Commit Diff


commit - 1b9c150021811fd083143dcb0ca5af1a868e4abc
commit + 1be7bb9ae79e1ec8a1628c35510990d3380004ae
blob - add410e2be395f7e62544825886e4c8678087dd4
blob + 521ba840c59574f7e97590a04a0de2e0c35bf400
--- lonk.py
+++ lonk.py
@@ -50,36 +50,36 @@ class ParagraphTag(_BaseTag):
     def __init__(self, tag, attrs):
         super().__init__(tag, attrs)
         self.content = []
+        self.footer = []
 
     def on_data(self, data):
         self.content.append(data.strip())
 
     def flush(self):
         rv = " ".join(" ".join(data.split()) for data in self.content if data)
+        footer = self.footer
         self.content = []
-        return rv + "\n" if rv else ""
+        self.footer = []
+        return "\n".join([rv] + footer) + "\n" if rv else ""
 
 
 class LinkTag(_BaseTag):
-    def __init__(self, tag, attrs):
+    def __init__(self, href, paragraph, tag, attrs):
         super().__init__(tag, attrs)
-        self.href = None
+        self.href = href
+        self.paragraph = paragraph
         self.content = []
-        for attr_name, attr_value in attrs:
-            if attr_name.lower() == "href":
-                self.href = urljoin(_build_url(), attr_value)
 
     def on_data(self, data):
+        if not self.content:
+            self.paragraph.on_data(f"↓")
+            self.paragraph.on_data(data)
         self.content.append(data.strip())
 
     def flush(self):
-        rv = ""
-        if self.href:
-            text = " ".join(" ".join(data.split()) for data in self.content if data)
-            rv = f"=> {self.href} {text}"
-        self.href = None
-        self.content = []
-        return rv
+        text = " ".join(" ".join(data.split()) for data in self.content if data)
+        self.paragraph.footer.append(f"=> {self.href} {text}")
+        return ""
 
 
 class LitItemTag(ParagraphTag):
@@ -116,15 +116,6 @@ class PreformattedTag(_BaseTag):
         return f"```\n{rv}\n```\n" if rv else ""
 
 
-_TAGS_IN_TEXT = {
-    "rb", "ruby", "rp", "rt",
-    "b", "strong", "i", "em", "s", 
-    "mark", "small", "del", "ins", "sub", "sup", "u", 
-    "code", "kbd", "time", "wbr", "dfn", "tt", "big",
-    "table", "thead", "tbody", "th", "tfoot", "td", "colgroup", "col", "caption",
-}
-
-
 class HtmlToGmi(HTMLParser):
     def __init__(self):
         super().__init__()
@@ -158,7 +149,9 @@ class HtmlToGmi(HTMLParser):
         elif tag in {"blockquote", "q"}:
             _push(QuoteTag(tag, attrs))
         elif tag == "a":
-            _push(LinkTag(tag, attrs))
+            href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href")
+            if href:
+                self.stack.append(LinkTag(href, self._get_current_paragraph(), tag, attrs))
         elif tag == "img":
             img = {attr_name: attr_value for attr_name, attr_value in attrs}
             title = img.get("title") or ""
@@ -180,7 +173,15 @@ class HtmlToGmi(HTMLParser):
         if self.stack and tag == self.stack[-1].tag:
             self.gmi_text.append(self.stack.pop().flush())
 
+    def _get_current_paragraph(self):
+        for elem in reversed(self.stack):
+            if isinstance(elem, ParagraphTag):
+                return elem
 
+        self.stack = [ParagraphTag("p", [])] + self.stack
+        return self.stack[0]
+
+
 def _format_honk(honk):
     assert honk["Format"] == "html", honk
     assert honk["Noise"] == honk["HTML"], honk
blob - 82b65b146819d13416f37b7f0be522f51fceaeda
blob + faa530310b306953874fd22e5d94bf1a60f1cd22
--- tests.py
+++ tests.py
@@ -3,11 +3,18 @@ from lonk import HtmlToGmi
 
 
 class TestHtmlToGmi(TestCase):
+    def test_links_in_paragraph(self):
+        html = '<p>head <a href="https://127.0.0.1">https link</a> <a href="gemini://127.0.0.1">gemini link</a> tail</p>'
+        self.assertEqual(HtmlToGmi().feed(html), """\
+head ↓ https link ↓ gemini link tail
+=> https://127.0.0.1 https link
+=> gemini://127.0.0.1 gemini link
+""")
+
     def test_in_text_tag(self):
         html = "<p><b>bold</b> text</p>"
         self.assertEqual(HtmlToGmi().feed(html), "bold text\n")
 
-
     def test_img_emu(self):
         html = "aa <img class=\"emu\" title=\":blobcatgooglyshrug:\" src=\"/d/6ytBYw515CvqFJZ8N2.png\"> bb</p>"
         self.assertEqual(HtmlToGmi().feed(html), "aa :blobcatgooglyshrug: bb\n")
@@ -29,7 +36,6 @@ Paragraph 1
 Paragraph 1.1
 """)
 
-
     def test_html2gmi_pre(self):
         html = """\
 <pre>
@@ -50,7 +56,6 @@ def fib(n):
 ```
 """)
 
-
     def test_html2gmi_description_list(self):
         html = """
             <dl>