commit - 1be7bb9ae79e1ec8a1628c35510990d3380004ae
commit + 1f653a9870bcdabc5ca4dbbb3041ec60b4687b7a
blob - 521ba840c59574f7e97590a04a0de2e0c35bf400
blob + 23c3b5ca275d13889f071c1d91d57e8c7d8237bc
--- lonk.py
+++ lonk.py
def on_data(self, data):
if not self.content:
- self.paragraph.on_data(f"↓")
+ self.paragraph.on_data("↳")
self.paragraph.on_data(data)
self.content.append(data.strip())
class HtmlToGmi(HTMLParser):
- def __init__(self):
+ def __init__(self, base_url):
super().__init__()
self.gmi_text = []
self.stack = []
+ self.base_url = base_url
def feed(self, html_text):
super().feed(html_text)
elif tag == "a":
href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href")
if href:
- self.stack.append(LinkTag(href, self._get_current_paragraph(), tag, attrs))
+ self.stack.append(LinkTag(urljoin(self.base_url, href), self._get_current_paragraph(), tag, attrs))
elif tag == "img":
img = {attr_name: attr_value for attr_name, attr_value in attrs}
title = img.get("title") or ""
else:
src = img.get("src")
if src:
- self.gmi_text.append(f"=> {src} {title}")
+ self.gmi_text.append(f"=> {urljoin(self.base_url, src)} {title}")
def handle_data(self, data):
# print(f" . {data=}")
assert honk["Format"] == "html", honk
assert honk["Noise"] == honk["HTML"], honk
firts_line = f'## From: {honk.get("Oondle") or honk["Handle"]}, {honk["Date"]}'
- lines = [firts_line, f'=> {honk["XID"]}', HtmlToGmi().feed(honk["Noise"])]
+ lines = [firts_line, f'=> {honk["XID"]}', HtmlToGmi(_build_url()).feed(honk["Noise"])]
for donk in (honk.get("Donks") or []):
lines.append(f'=> {donk["URL"]}')
blob - faa530310b306953874fd22e5d94bf1a60f1cd22
blob + 645ae6bd91a6c1d866d6cfd9f1b19c544531bca7
--- tests.py
+++ tests.py
class TestHtmlToGmi(TestCase):
+ def test_img_realtive(self):
+ html = '<img src="/d/xxxx.jpg" title="yyy">'
+ self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), "=> https://localhost/d/xxxx.jpg yyy")
+
+ def test_link_realtive(self):
+ html = '<p>head <a href="/sub1/1">https link</a> tail</p>'
+ self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), """\
+head ↳ https link tail
+=> https://localhost/sub1/1 https link
+""")
+
def test_links_in_paragraph(self):
html = '<p>head <a href="https://127.0.0.1">https link</a> <a href="gemini://127.0.0.1">gemini link</a> tail</p>'
- self.assertEqual(HtmlToGmi().feed(html), """\
-head ↓ https link ↓ gemini link tail
+ self.assertEqual(HtmlToGmi("").feed(html), """\
+head ↳ https link ↳ gemini link tail
=> https://127.0.0.1 https link
=> gemini://127.0.0.1 gemini link
""")
def test_in_text_tag(self):
html = "<p><b>bold</b> text</p>"
- self.assertEqual(HtmlToGmi().feed(html), "bold text\n")
+ self.assertEqual(HtmlToGmi("").feed(html), "bold text\n")
def test_img_emu(self):
html = "aa <img class=\"emu\" title=\":blobcatgooglyshrug:\" src=\"/d/6ytBYw515CvqFJZ8N2.png\"> bb</p>"
- self.assertEqual(HtmlToGmi().feed(html), "aa :blobcatgooglyshrug: bb\n")
+ self.assertEqual(HtmlToGmi("").feed(html), "aa :blobcatgooglyshrug: bb\n")
def test_html2gmi_header(self):
html = """\
<h2>Header 1.1</h2>
<p>Paragraph 1.1</p>
"""
- self.assertEqual(HtmlToGmi().feed(html), """\
+ self.assertEqual(HtmlToGmi("").feed(html), """\
# Header 1
Paragraph 1
return fib(n - 1) + fib(n - 1)
</pre>
"""
- self.assertEqual(HtmlToGmi().feed(html), """\
+ self.assertEqual(HtmlToGmi("").feed(html), """\
```
def fib(n):
<dd>White cold drink</dd>
</dl>
"""
- self.assertEqual(HtmlToGmi().feed(html), """\
+ self.assertEqual(HtmlToGmi("").feed(html), """\
* Coffee
Black hot drink