commit 1f653a9870bcdabc5ca4dbbb3041ec60b4687b7a
from: Aleksey Ryndin
date: Thu Sep 05 13:43:27 2024 UTC
Fix: render relative links
commit - 1be7bb9ae79e1ec8a1628c35510990d3380004ae
commit + 1f653a9870bcdabc5ca4dbbb3041ec60b4687b7a
blob - 521ba840c59574f7e97590a04a0de2e0c35bf400
blob + 23c3b5ca275d13889f071c1d91d57e8c7d8237bc
--- lonk.py
+++ lonk.py
@@ -72,7 +72,7 @@ class LinkTag(_BaseTag):
def on_data(self, data):
if not self.content:
- self.paragraph.on_data(f"↓")
+ self.paragraph.on_data("↳")
self.paragraph.on_data(data)
self.content.append(data.strip())
@@ -117,10 +117,11 @@ class PreformattedTag(_BaseTag):
class HtmlToGmi(HTMLParser):
- def __init__(self):
+ def __init__(self, base_url):
super().__init__()
self.gmi_text = []
self.stack = []
+ self.base_url = base_url
def feed(self, html_text):
super().feed(html_text)
@@ -151,7 +152,7 @@ class HtmlToGmi(HTMLParser):
elif tag == "a":
href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href")
if href:
- self.stack.append(LinkTag(href, self._get_current_paragraph(), tag, attrs))
+ self.stack.append(LinkTag(urljoin(self.base_url, href), self._get_current_paragraph(), tag, attrs))
elif tag == "img":
img = {attr_name: attr_value for attr_name, attr_value in attrs}
title = img.get("title") or ""
@@ -160,7 +161,7 @@ class HtmlToGmi(HTMLParser):
else:
src = img.get("src")
if src:
- self.gmi_text.append(f"=> {src} {title}")
+ self.gmi_text.append(f"=> {urljoin(self.base_url, src)} {title}")
def handle_data(self, data):
# print(f" . {data=}")
@@ -186,7 +187,7 @@ def _format_honk(honk):
assert honk["Format"] == "html", honk
assert honk["Noise"] == honk["HTML"], honk
firts_line = f'## From: {honk.get("Oondle") or honk["Handle"]}, {honk["Date"]}'
- lines = [firts_line, f'=> {honk["XID"]}', HtmlToGmi().feed(honk["Noise"])]
+ lines = [firts_line, f'=> {honk["XID"]}', HtmlToGmi(_build_url()).feed(honk["Noise"])]
for donk in (honk.get("Donks") or []):
lines.append(f'=> {donk["URL"]}')
blob - faa530310b306953874fd22e5d94bf1a60f1cd22
blob + 645ae6bd91a6c1d866d6cfd9f1b19c544531bca7
--- tests.py
+++ tests.py
@@ -3,21 +3,32 @@ from lonk import HtmlToGmi
class TestHtmlToGmi(TestCase):
+ def test_img_realtive(self):
+ html = ''
+ self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), "=> https://localhost/d/xxxx.jpg yyy")
+
+ def test_link_realtive(self):
+ html = 'head https link tail
'
+ self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), """\
+head ↳ https link tail
+=> https://localhost/sub1/1 https link
+""")
+
def test_links_in_paragraph(self):
html = 'head https link gemini link tail
'
- self.assertEqual(HtmlToGmi().feed(html), """\
-head ↓ https link ↓ gemini link tail
+ self.assertEqual(HtmlToGmi("").feed(html), """\
+head ↳ https link ↳ gemini link tail
=> https://127.0.0.1 https link
=> gemini://127.0.0.1 gemini link
""")
def test_in_text_tag(self):
html = "bold text
"
- self.assertEqual(HtmlToGmi().feed(html), "bold text\n")
+ self.assertEqual(HtmlToGmi("").feed(html), "bold text\n")
def test_img_emu(self):
html = "aa bb
"
- self.assertEqual(HtmlToGmi().feed(html), "aa :blobcatgooglyshrug: bb\n")
+ self.assertEqual(HtmlToGmi("").feed(html), "aa :blobcatgooglyshrug: bb\n")
def test_html2gmi_header(self):
html = """\
@@ -26,7 +37,7 @@ head ↓ https link ↓ gemini link tail
Header 1.1
Paragraph 1.1
"""
- self.assertEqual(HtmlToGmi().feed(html), """\
+ self.assertEqual(HtmlToGmi("").feed(html), """\
# Header 1
Paragraph 1
@@ -45,7 +56,7 @@ def fib(n):
return fib(n - 1) + fib(n - 1)
"""
- self.assertEqual(HtmlToGmi().feed(html), """\
+ self.assertEqual(HtmlToGmi("").feed(html), """\
```
def fib(n):
@@ -65,7 +76,7 @@ def fib(n):
White cold drink
"""
- self.assertEqual(HtmlToGmi().feed(html), """\
+ self.assertEqual(HtmlToGmi("").feed(html), """\
* Coffee
Black hot drink