commit ae087ef0d33ca1fd2aa0edb03a608bedee511501
from: Aleksey Ryndin <continue@to.any-key.press>
date: Wed Sep 04 14:09:07 2024 UTC

Fix: HTML tags processing

commit - 6948627533cde1876cb54f4e99373e13f95e7da2
commit + ae087ef0d33ca1fd2aa0edb03a608bedee511501
blob - 3e2f86ce0336842ade823598deeb51f480a9d5ea
blob + 3cebbf4fa27748f13d4dfb1c23180f9ffbb4dc48
--- lonk.py
+++ lonk.py
@@ -19,24 +19,6 @@ def _load_from(action, page, c=None):
     with urlopen(_build_url(path="api", query=urlencode(query)), timeout=15) as f:
         return json_loads(f.read().decode("utf8"))
 
-
-collected = {}
-
-
-home = _load_from(action="gethonks", page="home")
-for honk in reversed(home["honks"]):
-    convoy = honk["Convoy"]
-    if convoy in collected:
-        continue
-    if honk.get("RID"):
-        for honk_in_convoy in _load_from(action="gethonks", page="convoy", c=convoy)["honks"]:
-            if not honk_in_convoy.get("RID"):
-                collected[convoy] = honk_in_convoy
-                break
-    else:
-        collected[convoy] = honk
-
-
 class _BaseTag:
     def __init__(self, tag, attrs):
         self.tag = tag
@@ -96,6 +78,37 @@ class QuoteTag(ParagraphTag):
         return f"> {content}" if content else ""
 
 
+class HeaderTag(ParagraphTag):
+    def flush(self):
+        content = super().flush()
+        if not content:
+            return ""
+        return f"{'#' * int(self.tag[1:])} {content}"
+
+
+class PreformattedTag(_BaseTag):
+    def __init__(self, tag, attrs):
+        super().__init__(tag, attrs)
+        self.content = ""
+
+    def on_data(self, data):
+        self.content += data
+
+    def flush(self):
+        rv = self.content
+        self.content = ""
+        return f"```\n{rv}\n```\n" if rv else ""
+
+
+_TAGS_IN_TEXT = {
+    "rb", "ruby", "rp", "rt",
+    "b", "strong", "i", "em", "s", 
+    "mark", "small", "del", "ins", "sub", "sup", "u", 
+    "code", "kbd", "time", "wbr", "dfn", "tt", "big",
+    "table", "thead", "tbody", "th", "tfoot", "td", "colgroup", "col", "caption",
+}
+
+
 class HtmlToGmi(HTMLParser):
     def __init__(self):
         super().__init__()
@@ -111,33 +124,20 @@ class HtmlToGmi(HTMLParser):
     def handle_starttag(self, tag, attrs):
         # print(f" > {tag=}")
 
-        # TODO:
         # https://humungus.tedunangst.com/r/webs/v/tip/f/htfilter/html.go
-        # var permittedtags = map[string]bool{
-        # "div": true, "hr": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
-	    # "table": true, "thead": true, "tbody": true, "th": true, "tfoot": true,
-	    # "tr": true, "td": true, "colgroup": true, "col": true, "caption": true,
-	    # "p": true, "br": true, "pre": true, "code": true, "blockquote": true, "q": true,
-	    # "kbd": true, "time": true, "wbr": true, "aside": true,
-	    # "ruby": true, "rtc": true, "rb": true, "rt": true,
-	    # "samp": true, "mark": true, "ins": true, "dfn": true, "cite": true,
-	    # "abbr": true, "address": true, "details": true, "summary": true,
-	    # "strong": true, "em": true, "b": true, "i": true, "s": true, "u": true,
-	    # "sub": true, "sup": true, "del": true, "tt": true, "small": true, "big": true,
-	    # "ol": true, "ul": true, "li": true, "dl": true, "dt": true, "dd": true,
-
-        if tag in {"b", "strong", "i", "em", "mark", "small", "del", "ins", "sub", "sup", "u", "rb", "ruby", "rp", "rt"}:
-            # skip text formatting
+        if tag in _TAGS_IN_TEXT:
             return
 
         if self.stack:
             self.gmi_text.append(self.stack[-1].flush())
 
-        if tag in {"p", "br", "ul"}:
-            pass
-        elif tag == "li":
+        if tag == "pre":
+            self.stack.append(PreformattedTag(tag, attrs))
+        elif tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
+            self.stack.append(HeaderTag(tag, attrs))
+        elif tag in {"li", "dt"}:
             self.stack.append(LitItemTag(tag, attrs))
-        elif tag == "blockquote":
+        elif tag in {"blockquote", "q"}:
             self.stack.append(QuoteTag(tag, attrs))
         elif tag == "a":
             self.stack.append(LinkTag(tag, attrs))
@@ -151,8 +151,6 @@ class HtmlToGmi(HTMLParser):
                     src = urljoin(_build_url(), attr_value)
             if src:
                 self.gmi_text.append(f"=> {src} {title}")
-        else:
-            raise NotImplementedError(tag, attrs)
 
     def handle_data(self, data):
         # print(f" . {data=}")
@@ -182,19 +180,33 @@ def _format_honk(honk):
     return "\n".join(lines)
 
 
-print("# 𝓗 onk\n")
+if __name__ == '__main__':
+    collected = {}
 
-line = f"=> {_build_url(path='atme')} @me"
-if home["mecount"]:
-    line += f' ({home["mecount"]})'
-print(line)
+    home = _load_from(action="gethonks", page="home")
+    for honk in reversed(home["honks"]):
+        convoy = honk["Convoy"]
+        if convoy in collected:
+            continue
+        if honk.get("RID"):
+            for honk_in_convoy in _load_from(action="gethonks", page="convoy", c=convoy)["honks"]:
+                if not honk_in_convoy.get("RID"):
+                    collected[convoy] = honk_in_convoy
+                    break
+        else:
+            collected[convoy] = honk
+    print("# 𝓗 onk\n")
 
-line = f"=> {_build_url(path='chatter')} chatter"
-if home["chatcount"]:
-    line += f' ({home["chatcount"]})'
-print(line)
+    line = f"=> {_build_url(path='atme')} @me"
+    if home["mecount"]:
+        line += f' ({home["mecount"]})'
+    print(line)
 
+    line = f"=> {_build_url(path='chatter')} chatter"
+    if home["chatcount"]:
+        line += f' ({home["chatcount"]})'
+    print(line)
 
-for honk in reversed(collected.values()):
-    print("\n")
-    print(_format_honk(honk))
+    for honk in reversed(collected.values()):
+        print("\n")
+        print(_format_honk(honk))