commit - 1b9c150021811fd083143dcb0ca5af1a868e4abc
commit + 1be7bb9ae79e1ec8a1628c35510990d3380004ae
blob - add410e2be395f7e62544825886e4c8678087dd4
blob + 521ba840c59574f7e97590a04a0de2e0c35bf400
--- lonk.py
+++ lonk.py
def __init__(self, tag, attrs):
super().__init__(tag, attrs)
self.content = []
+ self.footer = []
def on_data(self, data):
self.content.append(data.strip())
def flush(self):
rv = " ".join(" ".join(data.split()) for data in self.content if data)
+ footer = self.footer
self.content = []
- return rv + "\n" if rv else ""
+ self.footer = []
+ return "\n".join([rv] + footer) + "\n" if rv else ""
class LinkTag(_BaseTag):
- def __init__(self, tag, attrs):
+ def __init__(self, href, paragraph, tag, attrs):
super().__init__(tag, attrs)
- self.href = None
+ self.href = href
+ self.paragraph = paragraph
self.content = []
- for attr_name, attr_value in attrs:
- if attr_name.lower() == "href":
- self.href = urljoin(_build_url(), attr_value)
def on_data(self, data):
+ if not self.content:
+ self.paragraph.on_data(f"↓")
+ self.paragraph.on_data(data)
self.content.append(data.strip())
def flush(self):
- rv = ""
- if self.href:
- text = " ".join(" ".join(data.split()) for data in self.content if data)
- rv = f"=> {self.href} {text}"
- self.href = None
- self.content = []
- return rv
+ text = " ".join(" ".join(data.split()) for data in self.content if data)
+ self.paragraph.footer.append(f"=> {self.href} {text}")
+ return ""
class LitItemTag(ParagraphTag):
return f"```\n{rv}\n```\n" if rv else ""
-_TAGS_IN_TEXT = {
- "rb", "ruby", "rp", "rt",
- "b", "strong", "i", "em", "s",
- "mark", "small", "del", "ins", "sub", "sup", "u",
- "code", "kbd", "time", "wbr", "dfn", "tt", "big",
- "table", "thead", "tbody", "th", "tfoot", "td", "colgroup", "col", "caption",
-}
-
-
class HtmlToGmi(HTMLParser):
def __init__(self):
super().__init__()
elif tag in {"blockquote", "q"}:
_push(QuoteTag(tag, attrs))
elif tag == "a":
- _push(LinkTag(tag, attrs))
+ href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href")
+ if href:
+ self.stack.append(LinkTag(href, self._get_current_paragraph(), tag, attrs))
elif tag == "img":
img = {attr_name: attr_value for attr_name, attr_value in attrs}
title = img.get("title") or ""
if self.stack and tag == self.stack[-1].tag:
self.gmi_text.append(self.stack.pop().flush())
+ def _get_current_paragraph(self):
+ for elem in reversed(self.stack):
+ if isinstance(elem, ParagraphTag):
+ return elem
+ self.stack = [ParagraphTag("p", [])] + self.stack
+ return self.stack[0]
+
+
def _format_honk(honk):
assert honk["Format"] == "html", honk
assert honk["Noise"] == honk["HTML"], honk
blob - 82b65b146819d13416f37b7f0be522f51fceaeda
blob + faa530310b306953874fd22e5d94bf1a60f1cd22
--- tests.py
+++ tests.py
class TestHtmlToGmi(TestCase):
+ def test_links_in_paragraph(self):
+ html = '<p>head <a href="https://127.0.0.1">https link</a> <a href="gemini://127.0.0.1">gemini link</a> tail</p>'
+ self.assertEqual(HtmlToGmi().feed(html), """\
+head ↓ https link ↓ gemini link tail
+=> https://127.0.0.1 https link
+=> gemini://127.0.0.1 gemini link
+""")
+
def test_in_text_tag(self):
html = "<p><b>bold</b> text</p>"
self.assertEqual(HtmlToGmi().feed(html), "bold text\n")
-
def test_img_emu(self):
html = "aa <img class=\"emu\" title=\":blobcatgooglyshrug:\" src=\"/d/6ytBYw515CvqFJZ8N2.png\"> bb</p>"
self.assertEqual(HtmlToGmi().feed(html), "aa :blobcatgooglyshrug: bb\n")
Paragraph 1.1
""")
-
def test_html2gmi_pre(self):
html = """\
<pre>
```
""")
-
def test_html2gmi_description_list(self):
html = """
<dl>