commit 1f653a9870bcdabc5ca4dbbb3041ec60b4687b7a from: Aleksey Ryndin date: Thu Sep 05 13:43:27 2024 UTC Fix: render relative links commit - 1be7bb9ae79e1ec8a1628c35510990d3380004ae commit + 1f653a9870bcdabc5ca4dbbb3041ec60b4687b7a blob - 521ba840c59574f7e97590a04a0de2e0c35bf400 blob + 23c3b5ca275d13889f071c1d91d57e8c7d8237bc --- lonk.py +++ lonk.py @@ -72,7 +72,7 @@ class LinkTag(_BaseTag): def on_data(self, data): if not self.content: - self.paragraph.on_data(f"↓") + self.paragraph.on_data("↳") self.paragraph.on_data(data) self.content.append(data.strip()) @@ -117,10 +117,11 @@ class PreformattedTag(_BaseTag): class HtmlToGmi(HTMLParser): - def __init__(self): + def __init__(self, base_url): super().__init__() self.gmi_text = [] self.stack = [] + self.base_url = base_url def feed(self, html_text): super().feed(html_text) @@ -151,7 +152,7 @@ class HtmlToGmi(HTMLParser): elif tag == "a": href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href") if href: - self.stack.append(LinkTag(href, self._get_current_paragraph(), tag, attrs)) + self.stack.append(LinkTag(urljoin(self.base_url, href), self._get_current_paragraph(), tag, attrs)) elif tag == "img": img = {attr_name: attr_value for attr_name, attr_value in attrs} title = img.get("title") or "" @@ -160,7 +161,7 @@ class HtmlToGmi(HTMLParser): else: src = img.get("src") if src: - self.gmi_text.append(f"=> {src} {title}") + self.gmi_text.append(f"=> {urljoin(self.base_url, src)} {title}") def handle_data(self, data): # print(f" . {data=}") @@ -186,7 +187,7 @@ def _format_honk(honk): assert honk["Format"] == "html", honk assert honk["Noise"] == honk["HTML"], honk firts_line = f'## From: {honk.get("Oondle") or honk["Handle"]}, {honk["Date"]}' - lines = [firts_line, f'=> {honk["XID"]}', HtmlToGmi().feed(honk["Noise"])] + lines = [firts_line, f'=> {honk["XID"]}', HtmlToGmi(_build_url()).feed(honk["Noise"])] for donk in (honk.get("Donks") or []): lines.append(f'=> {donk["URL"]}') blob - faa530310b306953874fd22e5d94bf1a60f1cd22 blob + 645ae6bd91a6c1d866d6cfd9f1b19c544531bca7 --- tests.py +++ tests.py @@ -3,21 +3,32 @@ from lonk import HtmlToGmi class TestHtmlToGmi(TestCase): + def test_img_realtive(self): + html = '' + self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), "=> https://localhost/d/xxxx.jpg yyy") + + def test_link_realtive(self): + html = '

head https link tail

' + self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), """\ +head ↳ https link tail +=> https://localhost/sub1/1 https link +""") + def test_links_in_paragraph(self): html = '

head https link gemini link tail

' - self.assertEqual(HtmlToGmi().feed(html), """\ -head ↓ https link ↓ gemini link tail + self.assertEqual(HtmlToGmi("").feed(html), """\ +head ↳ https link ↳ gemini link tail => https://127.0.0.1 https link => gemini://127.0.0.1 gemini link """) def test_in_text_tag(self): html = "

bold text

" - self.assertEqual(HtmlToGmi().feed(html), "bold text\n") + self.assertEqual(HtmlToGmi("").feed(html), "bold text\n") def test_img_emu(self): html = "aa bb

" - self.assertEqual(HtmlToGmi().feed(html), "aa :blobcatgooglyshrug: bb\n") + self.assertEqual(HtmlToGmi("").feed(html), "aa :blobcatgooglyshrug: bb\n") def test_html2gmi_header(self): html = """\ @@ -26,7 +37,7 @@ head ↓ https link ↓ gemini link tail

Header 1.1

Paragraph 1.1

""" - self.assertEqual(HtmlToGmi().feed(html), """\ + self.assertEqual(HtmlToGmi("").feed(html), """\ # Header 1 Paragraph 1 @@ -45,7 +56,7 @@ def fib(n): return fib(n - 1) + fib(n - 1) """ - self.assertEqual(HtmlToGmi().feed(html), """\ + self.assertEqual(HtmlToGmi("").feed(html), """\ ``` def fib(n): @@ -65,7 +76,7 @@ def fib(n):
White cold drink
""" - self.assertEqual(HtmlToGmi().feed(html), """\ + self.assertEqual(HtmlToGmi("").feed(html), """\ * Coffee Black hot drink