commit - 4aa3bfc3170cfbf70255585b2420a734c736fa35
commit + a7cabe9585c64f4697ea89eacaa92a1a417655da
blob - 4ba6f93c62fb3cf70232fbc0b215de58b8418205
blob + e36edd1f1880ae160310438f05d2eee1fbe51932
--- lonk.py
+++ lonk.py
from json import loads as json_loads
from pathlib import Path
-from urllib.parse import urlencode, urlunsplit
+from urllib.parse import urlencode, urlunsplit, urljoin
from urllib.request import urlopen
+from html.parser import HTMLParser
_TOKEN = (Path(__file__).parent / ".local" / "token").read_text(encoding="utf8")
collected[convoy] = honk
+class _BaseTag:
+ def __init__(self, tag, attrs):
+ self.tag = tag
+
+ def on_data(self, data):
+ raise NotImplementedError()
+
+ def flush(self):
+ raise NotImplementedError()
+
+
+class ParagraphTag(_BaseTag):
+ def __init__(self, tag, attrs):
+ super().__init__(tag, attrs)
+ self.content = []
+
+ def on_data(self, data):
+ self.content.append(data.strip())
+
+ def flush(self):
+ rv = " ".join(" ".join(data.split()) for data in self.content if data)
+ self.content = []
+ return rv + "\n" if rv else ""
+
+
+class LinkTag(_BaseTag):
+ def __init__(self, tag, attrs):
+ super().__init__(tag, attrs)
+ self.href = None
+ self.content = []
+ for attr_name, attr_value in attrs:
+ if attr_name.lower() == "href":
+ self.href = urljoin(_build_url(), attr_value)
+
+ def on_data(self, data):
+ self.content.append(data.strip())
+
+ def flush(self):
+ rv = ""
+ if self.href:
+ text = " ".join(" ".join(data.split()) for data in self.content if data)
+ rv = f"=> {self.href} {text}"
+ self.href = None
+ self.content = []
+ return rv
+
+
+class LitItemTag(ParagraphTag):
+ def flush(self):
+ content = super().flush()
+ return f"* {content}" if content else ""
+
+
+class QuoteTag(ParagraphTag):
+ def flush(self):
+ content = super().flush()
+ return f"> {content}" if content else ""
+
+
+class HtmlToGmi(HTMLParser):
+ def __init__(self):
+ super().__init__()
+ self.gmi_text = []
+ self.stack = []
+
+ def feed(self, html_text):
+ super().feed(html_text)
+ while self.stack:
+ self.gmi_text.append(self.stack.pop().flush())
+ return "\n".join(gmi_text for gmi_text in self.gmi_text if gmi_text)
+
+ def handle_starttag(self, tag, attrs):
+ # print(f" > {tag=}")
+
+ if tag in {"b", "strong", "i", "em", "mark", "small", "del", "ins", "sub", "sup", "u", "rb", "ruby", "rp", "rt"}:
+ # skip text formatting
+ return
+
+ if self.stack:
+ self.gmi_text.append(self.stack[-1].flush())
+
+ if tag in {"p", "br", "ul"}:
+ pass
+ elif tag == "li":
+ self.stack.append(LitItemTag(tag, attrs))
+ elif tag == "blockquote":
+ self.stack.append(QuoteTag(tag, attrs))
+ elif tag == "a":
+ if self.stack:
+ self.gmi_text.append(self.stack[-1].flush())
+ self.stack.append(LinkTag(tag, attrs))
+ elif tag == "img":
+ title = ""
+ src = ""
+ for attr_name, attr_value in attrs:
+ if attr_name.lower() == "title":
+ title = attr_value
+ elif attr_name.lower() == "src":
+ src = urljoin(_build_url(), attr_value)
+ if src:
+ self.gmi_text.append(f"=> {src} {title}")
+ else:
+ raise NotImplementedError(tag, attrs)
+
+ def handle_data(self, data):
+ # print(f" . {data=}")
+ if not self.stack:
+ self.stack.append(ParagraphTag("tag", []))
+ self.stack[-1].on_data(data)
+
+ def handle_endtag(self, tag):
+ # print(f" < {tag=}")
+ if self.stack:
+ self.gmi_text.append(self.stack[-1].flush())
+ if tag == self.stack[-1].tag:
+ self.stack.pop()
+
+
def _format_honk(honk):
assert honk["Format"] == "html", honk
assert honk["Noise"] == honk["HTML"], honk
firts_line = f'## From: {honk.get("Oondle") or honk["Handle"]}, {honk["Date"]}'
- lines = [firts_line, f'=> {honk["XID"]}', honk["Noise"]]
+ lines = [firts_line, f'=> {honk["XID"]}', HtmlToGmi().feed(honk["Noise"])]
for donk in (honk.get("Donks") or []):
lines.append(f'=> {donk["URL"]}')