commit ae087ef0d33ca1fd2aa0edb03a608bedee511501 from: Aleksey Ryndin date: Wed Sep 04 14:09:07 2024 UTC Fix: HTML tags processing commit - 6948627533cde1876cb54f4e99373e13f95e7da2 commit + ae087ef0d33ca1fd2aa0edb03a608bedee511501 blob - 3e2f86ce0336842ade823598deeb51f480a9d5ea blob + 3cebbf4fa27748f13d4dfb1c23180f9ffbb4dc48 --- lonk.py +++ lonk.py @@ -19,24 +19,6 @@ def _load_from(action, page, c=None): with urlopen(_build_url(path="api", query=urlencode(query)), timeout=15) as f: return json_loads(f.read().decode("utf8")) - -collected = {} - - -home = _load_from(action="gethonks", page="home") -for honk in reversed(home["honks"]): - convoy = honk["Convoy"] - if convoy in collected: - continue - if honk.get("RID"): - for honk_in_convoy in _load_from(action="gethonks", page="convoy", c=convoy)["honks"]: - if not honk_in_convoy.get("RID"): - collected[convoy] = honk_in_convoy - break - else: - collected[convoy] = honk - - class _BaseTag: def __init__(self, tag, attrs): self.tag = tag @@ -96,6 +78,37 @@ class QuoteTag(ParagraphTag): return f"> {content}" if content else "" +class HeaderTag(ParagraphTag): + def flush(self): + content = super().flush() + if not content: + return "" + return f"{'#' * int(self.tag[1:])} {content}" + + +class PreformattedTag(_BaseTag): + def __init__(self, tag, attrs): + super().__init__(tag, attrs) + self.content = "" + + def on_data(self, data): + self.content += data + + def flush(self): + rv = self.content + self.content = "" + return f"```\n{rv}\n```\n" if rv else "" + + +_TAGS_IN_TEXT = { + "rb", "ruby", "rp", "rt", + "b", "strong", "i", "em", "s", + "mark", "small", "del", "ins", "sub", "sup", "u", + "code", "kbd", "time", "wbr", "dfn", "tt", "big", + "table", "thead", "tbody", "th", "tfoot", "td", "colgroup", "col", "caption", +} + + class HtmlToGmi(HTMLParser): def __init__(self): super().__init__() @@ -111,33 +124,20 @@ class HtmlToGmi(HTMLParser): def handle_starttag(self, tag, attrs): # print(f" > {tag=}") - # TODO: # https://humungus.tedunangst.com/r/webs/v/tip/f/htfilter/html.go - # var permittedtags = map[string]bool{ - # "div": true, "hr": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, - # "table": true, "thead": true, "tbody": true, "th": true, "tfoot": true, - # "tr": true, "td": true, "colgroup": true, "col": true, "caption": true, - # "p": true, "br": true, "pre": true, "code": true, "blockquote": true, "q": true, - # "kbd": true, "time": true, "wbr": true, "aside": true, - # "ruby": true, "rtc": true, "rb": true, "rt": true, - # "samp": true, "mark": true, "ins": true, "dfn": true, "cite": true, - # "abbr": true, "address": true, "details": true, "summary": true, - # "strong": true, "em": true, "b": true, "i": true, "s": true, "u": true, - # "sub": true, "sup": true, "del": true, "tt": true, "small": true, "big": true, - # "ol": true, "ul": true, "li": true, "dl": true, "dt": true, "dd": true, - - if tag in {"b", "strong", "i", "em", "mark", "small", "del", "ins", "sub", "sup", "u", "rb", "ruby", "rp", "rt"}: - # skip text formatting + if tag in _TAGS_IN_TEXT: return if self.stack: self.gmi_text.append(self.stack[-1].flush()) - if tag in {"p", "br", "ul"}: - pass - elif tag == "li": + if tag == "pre": + self.stack.append(PreformattedTag(tag, attrs)) + elif tag in {"h1", "h2", "h3", "h4", "h5", "h6"}: + self.stack.append(HeaderTag(tag, attrs)) + elif tag in {"li", "dt"}: self.stack.append(LitItemTag(tag, attrs)) - elif tag == "blockquote": + elif tag in {"blockquote", "q"}: self.stack.append(QuoteTag(tag, attrs)) elif tag == "a": self.stack.append(LinkTag(tag, attrs)) @@ -151,8 +151,6 @@ class HtmlToGmi(HTMLParser): src = urljoin(_build_url(), attr_value) if src: self.gmi_text.append(f"=> {src} {title}") - else: - raise NotImplementedError(tag, attrs) def handle_data(self, data): # print(f" . {data=}") @@ -182,19 +180,33 @@ def _format_honk(honk): return "\n".join(lines) -print("# 𝓗 onk\n") +if __name__ == '__main__': + collected = {} -line = f"=> {_build_url(path='atme')} @me" -if home["mecount"]: - line += f' ({home["mecount"]})' -print(line) + home = _load_from(action="gethonks", page="home") + for honk in reversed(home["honks"]): + convoy = honk["Convoy"] + if convoy in collected: + continue + if honk.get("RID"): + for honk_in_convoy in _load_from(action="gethonks", page="convoy", c=convoy)["honks"]: + if not honk_in_convoy.get("RID"): + collected[convoy] = honk_in_convoy + break + else: + collected[convoy] = honk + print("# 𝓗 onk\n") -line = f"=> {_build_url(path='chatter')} chatter" -if home["chatcount"]: - line += f' ({home["chatcount"]})' -print(line) + line = f"=> {_build_url(path='atme')} @me" + if home["mecount"]: + line += f' ({home["mecount"]})' + print(line) + line = f"=> {_build_url(path='chatter')} chatter" + if home["chatcount"]: + line += f' ({home["chatcount"]})' + print(line) -for honk in reversed(collected.values()): - print("\n") - print(_format_honk(honk)) + for honk in reversed(collected.values()): + print("\n") + print(_format_honk(honk))