commit 4e7950e1115bb33752f83affbb29a755bff439bc from: Aleksey Ryndin date: Mon Sep 30 16:56:19 2024 UTC Fix: lonk page algorithm commit - b0c5ad6847e614fab6be94f3cdaacefa1eda5769 commit + 4e7950e1115bb33752f83affbb29a755bff439bc blob - f9eb0771572b3583a7b115458bae5b491a4e8331 blob + 590430b8eccd03bcfd3f3c2d834c5b4e9c6b9607 --- lonk.py +++ lonk.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -from datetime import datetime from json import loads as json_loads from mimetypes import guess_type from os import environ @@ -129,11 +128,11 @@ class HtmlToGmi(HTMLParser): elif tag in {"blockquote", "q"}: _push(QuoteTag(tag, attrs)) elif tag == "a": - href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href") + href = dict(attrs).get("href") if href: self.stack.append(LinkTag(urljoin(self.base_url, href), self._get_current_paragraph(), tag, attrs)) elif tag == "img": - img = {attr_name: attr_value for attr_name, attr_value in attrs} + img = dict(attrs) title = img.get("title") or "" if img.get("class") == "emu" and title and self.stack: self.stack[-1].on_data(title) @@ -231,14 +230,14 @@ class HonkUrl: def _proxy_url_factory(proxy_media_enabled, lonk_url): def _get_proxy_url(mime, url): return ( - lonk_url.build("proxy", urlencode({"m": mime, "u": url})) + lonk_url.build("proxy", urlencode({"m": mime, "u": url})) if mime and proxy_media_enabled else url ) return _get_proxy_url -def _create_schema(db_con, cert_hash): +def _create_schema(db_con): db_con.execute( """ CREATE TABLE @@ -282,104 +281,126 @@ def _create_schema(db_con, cert_hash): """ ) - # ========================================================================== - # TODO: test code (cert_hash must be removed, cascade) - with open(".local/settings.json") as f: - settings = json_loads(f.read()) - db_con.execute( - "INSERT INTO client (cert_hash, honk_url, token) VALUES (?, ?, ?)", - (cert_hash, settings["server"], settings["token"]) - ) - # ========================================================================== - -def db_connect(cert_hash): +def db_connect(): db_file_path = Path(__file__).parent / ".local" / "db" db_file_path.parent.mkdir(parents=True, exist_ok=True) db_exist = db_file_path.exists() db_con = sqlite3_connect(db_file_path) if not db_exist: with db_con: - _create_schema(db_con, cert_hash) + _create_schema(db_con) return db_con -def _lonk_impl(db_con, client_id, lonk_url, honk_url): - row = db_con.execute( - "SELECT MAX(honk_id) FROM convoy WHERE client_id=?", - (client_id, ) - ).fetchone() - after, = row +class _Collected: + def __init__(self, convoy_id, convoy, honk_id, honker, url, html, date): + self.convoy_id = convoy_id + self.convoy = convoy + self.honk_id = honk_id + self.honker = honker + self.url = url + self.html = html + self.date = date + self._donks = None - fn_media_url = _proxy_url_factory(environ.get("LONK_PROXY_MEDIA"), lonk_url) + def add_donk(self, donk_url, mime, alt_text): + if self._donks is None: + self._donks = [(donk_url, mime, alt_text)] + else: + self._donks.append((donk_url, mime, alt_text)) - home = honk_url.do_get(action="gethonks", page="home", after=after) + def end_of_donks(self): + if self._donks is None: + self._donks = [] + + def iterate_donks(self, db_con): + if self._donks is not None: + yield from self._donks + return + + res_donks = db_con.execute( + "SELECT url, mime, alt_text FROM donk WHERE convoy_id=?", + (self.convoy_id, ) + ) + while True: + donks = res_donks.fetchmany() + if not donks: + return + + yield from donks + + +def _lonk_impl(db_con, client_id, lonk_url, honk_url): + fn_media_url = _proxy_url_factory(environ.get("LONK_PROXY_MEDIA"), lonk_url) + home = honk_url.do_get(action="gethonks", page="home") + lonk_page = {} for honk in reversed(home.get("honks") or []): convoy = honk["Convoy"] + if convoy in lonk_page: + continue + row = db_con.execute( - "SELECT convoy_id FROM convoy WHERE client_id=? AND convoy=?", + "SELECT convoy_id, convoy, honk_id, honker, url, html, date FROM convoy WHERE client_id=? AND convoy=?", (client_id, convoy) ).fetchone() if row: + lonk_page[convoy] = _Collected(*row) continue - def _save_convoy(honker, honk): + def _save_convoy(convoy, honker, honk): row = db_con.execute( """ INSERT INTO convoy(convoy, client_id, honk_id, honker, url, html, date) VALUES (?, ?, ?, ?, ?, ?, ?) - RETURNING convoy_id + RETURNING + convoy_id """, (convoy, client_id, honk["ID"], honker, honk["XID"], honk["HTML"], honk["Date"]) ).fetchone() convoy_id, = row + lonk_page[convoy] = _Collected( + convoy_id, convoy, honk["ID"], honker, honk["XID"], honk["HTML"], honk["Date"] + ) for donk in (honk.get("Donks") or []): donk_url = honk_url.build_url(path=f'/d/{donk["XID"]}') if donk.get("XID") else donk["URL"] + mime, alt_text = donk["Media"], donk.get("Desc") or donk.get("Name") or None db_con.execute( "INSERT INTO donk (convoy_id, url, mime, alt_text) VALUES (?, ?, ?, ?)", - (convoy_id, donk_url, donk["Media"], donk.get("Desc") or donk.get("Name") or None) + (lonk_page[convoy].convoy_id, donk_url, mime, alt_text, ) ) + lonk_page[convoy].add_donk(donk_url, mime, alt_text) + lonk_page[convoy].end_of_donks() if honk.get("RID"): for honk_in_convoy in honk_url.do_get(action="gethonks", page="convoy", c=convoy)["honks"]: if not honk_in_convoy.get("RID"): author = honk_in_convoy.get("Oondle") or honk_in_convoy["Handle"] honker = f'{author} (🧵 {honk["Handle"]})' - _save_convoy(honker, honk_in_convoy) + _save_convoy(convoy, honker, honk_in_convoy) break else: - db_con.execute( + row = db_con.execute( """ INSERT INTO convoy(convoy, client_id, honk_id) VALUES (?, ?, ?) + RETURNING + convoy_id """, (convoy, client_id, honk["ID"]) - ) + ).fetchone() + convoy_id, = row + lonk_page[convoy] = _Collected(convoy_id, convoy, None, None, None, None, None) else: oondle = honk.get("Oondle") honker = f'{oondle} (🔁 {honk["Handle"]})' if oondle else f'{honk["Handle"]}' - _save_convoy(honker, honk) + _save_convoy(convoy, honker, honk) - res = db_con.execute( - """ - SELECT - convoy_id, honker, url, html, date - FROM - convoy - WHERE - client_id=? AND honker IS NOT NULL - ORDER BY - convoy_id DESC - LIMIT 256 - """, - (client_id, ) - ) - print("20 text/gemini\r") print("# 𝓗 onk\r") print("\r") @@ -395,37 +416,26 @@ def _lonk_impl(db_con, client_id, lonk_url, honk_url): print(line + "\r") print("\r") - while True: - rows = res.fetchmany() - if not rows: - break + for collected in reversed(lonk_page.values()): + if collected.honker is None: + continue + lines = [ + f"## From {collected.honker} {collected.date}", + f"=> {collected.url}", + HtmlToGmi(honk_url.build_url(), fn_media_url).feed(collected.html) + ] - for row in rows: - convoy_id, honker, url, html, date, = row - lines = [ - f"## From {honker} {date}", - f"=> {url}", - HtmlToGmi(honk_url.build_url(), fn_media_url).feed(html) - ] - res_donks = db_con.execute( - "SELECT url, mime, alt_text FROM donk WHERE convoy_id=?", - (convoy_id, ) - ) - while True: - donks = res_donks.fetchmany() - if not donks: - break - for donk in donks: - donk_url, donk_mime, donk_text, = donk - lines.append(f'=> {fn_media_url(donk_mime, donk_url)}') - if donk_text: - lines.append(donk_text) - print("\r\n".join(lines)) - print("\r") + for donk_url, donk_mime, donk_text in collected.iterate_donks(db_con): + lines.append(f'=> {fn_media_url(donk_mime, donk_url)}') + if donk_text: + lines.append(donk_text) + print("\r\n".join(lines)) + print("\r") + def lonk(cert_hash, lonk_url): - db_con = db_connect(cert_hash) + db_con = db_connect() row = db_con.execute("SELECT client_id, honk_url, token FROM client WHERE cert_hash=?", (cert_hash, )).fetchone() if not row: print(f'30 {lonk_url.build("ask_server")}\r') @@ -440,8 +450,6 @@ def new_client_stage_1_ask_server(lonk_url): print("10 Honk server URL\r") return splitted = urlsplit(unquote(lonk_url.query)) - honk_url = urlunsplit((splitted.scheme, splitted.netloc, "", "", "")) - path = [quote(urlunsplit((splitted.scheme, splitted.netloc, "", "", "")), safe=""), "ask_username"] print(f'30 {lonk_url.build(path)}\r') @@ -474,7 +482,7 @@ def new_client_stage_3_ask_password(cert_hash, lonk_ur } with urlopen(honk_url + "/dologin", data=urlencode(post_data).encode(), timeout=15) as f: token = f.read().decode("utf8") - db_con = db_connect(cert_hash) + db_con = db_connect() with db_con: db_con.execute( "INSERT INTO client (cert_hash, honk_url, token) VALUES (?, ?, ?)", @@ -514,15 +522,15 @@ def vgi(cert_hash, raw_url): if __name__ == '__main__': - cert_hash = environ.get("VGI_CERT_HASH") - if cert_hash: + cert_hash_ = environ.get("VGI_CERT_HASH") + if cert_hash_: try: start_time = clock_gettime(CLOCK_MONOTONIC) try: - raw_url = input().strip() - vgi(cert_hash, raw_url) + input_url = input().strip() + vgi(cert_hash_, input_url) finally: - stderr.write(f"{cert_hash}|{raw_url}|{clock_gettime(CLOCK_MONOTONIC) - start_time:.3f}sec.\n") + stderr.write(f"{cert_hash_}|{input_url}|{clock_gettime(CLOCK_MONOTONIC) - start_time:.3f}sec.\n") except HTTPError as error: print(f"43 Remote server return {error.code}: {error.reason}\r") except URLError as error: blob - b4f00241cdc9d1d15900206ebe3aa4860a9f2f8f blob + ab4b022bf5c7162b225518866f3eae5abeb9a94a --- tests.py +++ tests.py @@ -2,10 +2,14 @@ from unittest import TestCase, main from lonk import HtmlToGmi +def fn_media_url(mime, img_url): + return img_url + + class TestHtmlToGmi(TestCase): def test_br(self): html = '

head
tail

' - self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), """\ + self.assertEqual(HtmlToGmi("https://localhost/api", fn_media_url).feed(html), """\ head tail @@ -13,18 +17,18 @@ tail def test_img_realtive(self): html = '' - self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), "=> https://localhost/d/xxxx.jpg yyy") + self.assertEqual(HtmlToGmi("https://localhost/api", fn_media_url).feed(html), "=> https://localhost/d/xxxx.jpg yyy") def test_link_realtive(self): html = '

head https link tail

' - self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), """\ + self.assertEqual(HtmlToGmi("https://localhost/api", fn_media_url).feed(html), """\ head ↳ https link tail => https://localhost/sub1/1 https link """) def test_links_in_paragraph(self): html = '

head https link gemini link tail

' - self.assertEqual(HtmlToGmi("").feed(html), """\ + self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\ head ↳ https link ↳ gemini link tail => https://127.0.0.1 https link => gemini://127.0.0.1 gemini link @@ -32,11 +36,11 @@ head ↳ https link ↳ gemini link tail def test_in_text_tag(self): html = "

bold text

" - self.assertEqual(HtmlToGmi("").feed(html), "bold text\n") + self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), "bold text\n") def test_img_emu(self): html = "aa bb

" - self.assertEqual(HtmlToGmi("").feed(html), "aa :blobcatgooglyshrug: bb\n") + self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), "aa :blobcatgooglyshrug: bb\n") def test_html2gmi_header(self): html = """\ @@ -45,7 +49,7 @@ head ↳ https link ↳ gemini link tail

Header 1.1

Paragraph 1.1

""" - self.assertEqual(HtmlToGmi("").feed(html), """\ + self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\ # Header 1 Paragraph 1 @@ -64,7 +68,7 @@ def fib(n): return fib(n - 1) + fib(n - 1) """ - self.assertEqual(HtmlToGmi("").feed(html), """\ + self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\ ``` def fib(n): @@ -84,7 +88,7 @@ def fib(n):
White cold drink
""" - self.assertEqual(HtmlToGmi("").feed(html), """\ + self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\ * Coffee Black hot drink