Commit Diff


commit - 6e3768be317a2aaa642cdee5f694c0d2395e955a
commit + 1b9c150021811fd083143dcb0ca5af1a868e4abc
blob - b743d4785bf84fdf3bb5e5889150401f1081b1e9
blob + add410e2be395f7e62544825886e4c8678087dd4
--- lonk.py
+++ lonk.py
@@ -141,58 +141,46 @@ class HtmlToGmi(HTMLParser):
         # print(f" > {tag=}")
 
         # https://humungus.tedunangst.com/r/webs/v/tip/f/htfilter/html.go
-        if tag in _TAGS_IN_TEXT:
-            return
 
-        if tag == "img":
-            img = {attr_name: attr_value for attr_name, attr_value in attrs}
-            title = img.get("title")
-            if img.get("class") == "emu" and title and self.stack:
-                self.stack[-1].on_data(title)
-                return
-                
+        def _push(elem):
+            if self.stack:
+                self.gmi_text.append(self.stack[-1].flush())
+            self.stack.append(elem)
 
-        if self.stack:
-            self.gmi_text.append(self.stack[-1].flush())
-
-        if tag == "pre":
-            self.stack.append(PreformattedTag(tag, attrs))
+        if tag == "p":
+            _push(ParagraphTag(tag, attrs))
+        elif tag == "pre":
+            _push(PreformattedTag(tag, attrs))
         elif tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
-            self.stack.append(HeaderTag(tag, attrs))
+            _push(HeaderTag(tag, attrs))
         elif tag in {"li", "dt"}:
-            self.stack.append(LitItemTag(tag, attrs))
+            _push(LitItemTag(tag, attrs))
         elif tag in {"blockquote", "q"}:
-            self.stack.append(QuoteTag(tag, attrs))
+            _push(QuoteTag(tag, attrs))
         elif tag == "a":
-            self.stack.append(LinkTag(tag, attrs))
+            _push(LinkTag(tag, attrs))
         elif tag == "img":
-            title = "" 
-            src = ""
-            for attr_name, attr_value in attrs:
-                if attr_name.lower() == "title":
-                    title = attr_value
-                elif attr_name.lower() == "src":
-                    src = urljoin(_build_url(), attr_value)
-            if src:
-                self.gmi_text.append(f"=> {src} {title}")
+            img = {attr_name: attr_value for attr_name, attr_value in attrs}
+            title = img.get("title") or ""
+            if img.get("class") == "emu" and title and self.stack:
+                self.stack[-1].on_data(title)
+            else:
+                src = img.get("src")
+                if src:
+                    self.gmi_text.append(f"=> {src} {title}")
 
     def handle_data(self, data):
         # print(f" . {data=}")
         if not self.stack:
-            self.stack.append(ParagraphTag("tag", []))
+            self.stack.append(ParagraphTag("p", []))
         self.stack[-1].on_data(data)
 
     def handle_endtag(self, tag):
         # print(f" < {tag=}")
-        if tag in _TAGS_IN_TEXT:
-            return
+        if self.stack and tag == self.stack[-1].tag:
+            self.gmi_text.append(self.stack.pop().flush())
 
-        if self.stack:
-            self.gmi_text.append(self.stack[-1].flush())
-            if tag == self.stack[-1].tag:
-                self.stack.pop()
 
-
 def _format_honk(honk):
     assert honk["Format"] == "html", honk
     assert honk["Noise"] == honk["HTML"], honk