commit 1be7bb9ae79e1ec8a1628c35510990d3380004ae from: Aleksey Ryndin date: Thu Sep 05 12:46:28 2024 UTC Fix: render links after paragraph commit - 1b9c150021811fd083143dcb0ca5af1a868e4abc commit + 1be7bb9ae79e1ec8a1628c35510990d3380004ae blob - add410e2be395f7e62544825886e4c8678087dd4 blob + 521ba840c59574f7e97590a04a0de2e0c35bf400 --- lonk.py +++ lonk.py @@ -50,36 +50,36 @@ class ParagraphTag(_BaseTag): def __init__(self, tag, attrs): super().__init__(tag, attrs) self.content = [] + self.footer = [] def on_data(self, data): self.content.append(data.strip()) def flush(self): rv = " ".join(" ".join(data.split()) for data in self.content if data) + footer = self.footer self.content = [] - return rv + "\n" if rv else "" + self.footer = [] + return "\n".join([rv] + footer) + "\n" if rv else "" class LinkTag(_BaseTag): - def __init__(self, tag, attrs): + def __init__(self, href, paragraph, tag, attrs): super().__init__(tag, attrs) - self.href = None + self.href = href + self.paragraph = paragraph self.content = [] - for attr_name, attr_value in attrs: - if attr_name.lower() == "href": - self.href = urljoin(_build_url(), attr_value) def on_data(self, data): + if not self.content: + self.paragraph.on_data(f"↓") + self.paragraph.on_data(data) self.content.append(data.strip()) def flush(self): - rv = "" - if self.href: - text = " ".join(" ".join(data.split()) for data in self.content if data) - rv = f"=> {self.href} {text}" - self.href = None - self.content = [] - return rv + text = " ".join(" ".join(data.split()) for data in self.content if data) + self.paragraph.footer.append(f"=> {self.href} {text}") + return "" class LitItemTag(ParagraphTag): @@ -116,15 +116,6 @@ class PreformattedTag(_BaseTag): return f"```\n{rv}\n```\n" if rv else "" -_TAGS_IN_TEXT = { - "rb", "ruby", "rp", "rt", - "b", "strong", "i", "em", "s", - "mark", "small", "del", "ins", "sub", "sup", "u", - "code", "kbd", "time", "wbr", "dfn", "tt", "big", - "table", "thead", "tbody", "th", "tfoot", "td", "colgroup", "col", "caption", -} - - class HtmlToGmi(HTMLParser): def __init__(self): super().__init__() @@ -158,7 +149,9 @@ class HtmlToGmi(HTMLParser): elif tag in {"blockquote", "q"}: _push(QuoteTag(tag, attrs)) elif tag == "a": - _push(LinkTag(tag, attrs)) + href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href") + if href: + self.stack.append(LinkTag(href, self._get_current_paragraph(), tag, attrs)) elif tag == "img": img = {attr_name: attr_value for attr_name, attr_value in attrs} title = img.get("title") or "" @@ -180,7 +173,15 @@ class HtmlToGmi(HTMLParser): if self.stack and tag == self.stack[-1].tag: self.gmi_text.append(self.stack.pop().flush()) + def _get_current_paragraph(self): + for elem in reversed(self.stack): + if isinstance(elem, ParagraphTag): + return elem + self.stack = [ParagraphTag("p", [])] + self.stack + return self.stack[0] + + def _format_honk(honk): assert honk["Format"] == "html", honk assert honk["Noise"] == honk["HTML"], honk blob - 82b65b146819d13416f37b7f0be522f51fceaeda blob + faa530310b306953874fd22e5d94bf1a60f1cd22 --- tests.py +++ tests.py @@ -3,11 +3,18 @@ from lonk import HtmlToGmi class TestHtmlToGmi(TestCase): + def test_links_in_paragraph(self): + html = '

head https link gemini link tail

' + self.assertEqual(HtmlToGmi().feed(html), """\ +head ↓ https link ↓ gemini link tail +=> https://127.0.0.1 https link +=> gemini://127.0.0.1 gemini link +""") + def test_in_text_tag(self): html = "

bold text

" self.assertEqual(HtmlToGmi().feed(html), "bold text\n") - def test_img_emu(self): html = "aa bb

" self.assertEqual(HtmlToGmi().feed(html), "aa :blobcatgooglyshrug: bb\n") @@ -29,7 +36,6 @@ Paragraph 1 Paragraph 1.1 """) - def test_html2gmi_pre(self): html = """\
@@ -50,7 +56,6 @@ def fib(n):
 ```
 """)
 
-
     def test_html2gmi_description_list(self):
         html = """