commit 4e7950e1115bb33752f83affbb29a755bff439bc
from: Aleksey Ryndin
date: Mon Sep 30 16:56:19 2024 UTC
Fix: lonk page algorithm
commit - b0c5ad6847e614fab6be94f3cdaacefa1eda5769
commit + 4e7950e1115bb33752f83affbb29a755bff439bc
blob - f9eb0771572b3583a7b115458bae5b491a4e8331
blob + 590430b8eccd03bcfd3f3c2d834c5b4e9c6b9607
--- lonk.py
+++ lonk.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
-from datetime import datetime
from json import loads as json_loads
from mimetypes import guess_type
from os import environ
@@ -129,11 +128,11 @@ class HtmlToGmi(HTMLParser):
elif tag in {"blockquote", "q"}:
_push(QuoteTag(tag, attrs))
elif tag == "a":
- href = {attr_name: attr_value for attr_name, attr_value in attrs}.get("href")
+ href = dict(attrs).get("href")
if href:
self.stack.append(LinkTag(urljoin(self.base_url, href), self._get_current_paragraph(), tag, attrs))
elif tag == "img":
- img = {attr_name: attr_value for attr_name, attr_value in attrs}
+ img = dict(attrs)
title = img.get("title") or ""
if img.get("class") == "emu" and title and self.stack:
self.stack[-1].on_data(title)
@@ -231,14 +230,14 @@ class HonkUrl:
def _proxy_url_factory(proxy_media_enabled, lonk_url):
def _get_proxy_url(mime, url):
return (
- lonk_url.build("proxy", urlencode({"m": mime, "u": url}))
+ lonk_url.build("proxy", urlencode({"m": mime, "u": url}))
if mime and proxy_media_enabled else
url
)
return _get_proxy_url
-def _create_schema(db_con, cert_hash):
+def _create_schema(db_con):
db_con.execute(
"""
CREATE TABLE
@@ -282,104 +281,126 @@ def _create_schema(db_con, cert_hash):
"""
)
- # ==========================================================================
- # TODO: test code (cert_hash must be removed, cascade)
- with open(".local/settings.json") as f:
- settings = json_loads(f.read())
- db_con.execute(
- "INSERT INTO client (cert_hash, honk_url, token) VALUES (?, ?, ?)",
- (cert_hash, settings["server"], settings["token"])
- )
- # ==========================================================================
-
-def db_connect(cert_hash):
+def db_connect():
db_file_path = Path(__file__).parent / ".local" / "db"
db_file_path.parent.mkdir(parents=True, exist_ok=True)
db_exist = db_file_path.exists()
db_con = sqlite3_connect(db_file_path)
if not db_exist:
with db_con:
- _create_schema(db_con, cert_hash)
+ _create_schema(db_con)
return db_con
-def _lonk_impl(db_con, client_id, lonk_url, honk_url):
- row = db_con.execute(
- "SELECT MAX(honk_id) FROM convoy WHERE client_id=?",
- (client_id, )
- ).fetchone()
- after, = row
+class _Collected:
+ def __init__(self, convoy_id, convoy, honk_id, honker, url, html, date):
+ self.convoy_id = convoy_id
+ self.convoy = convoy
+ self.honk_id = honk_id
+ self.honker = honker
+ self.url = url
+ self.html = html
+ self.date = date
+ self._donks = None
- fn_media_url = _proxy_url_factory(environ.get("LONK_PROXY_MEDIA"), lonk_url)
+ def add_donk(self, donk_url, mime, alt_text):
+ if self._donks is None:
+ self._donks = [(donk_url, mime, alt_text)]
+ else:
+ self._donks.append((donk_url, mime, alt_text))
- home = honk_url.do_get(action="gethonks", page="home", after=after)
+ def end_of_donks(self):
+ if self._donks is None:
+ self._donks = []
+
+ def iterate_donks(self, db_con):
+ if self._donks is not None:
+ yield from self._donks
+ return
+
+ res_donks = db_con.execute(
+ "SELECT url, mime, alt_text FROM donk WHERE convoy_id=?",
+ (self.convoy_id, )
+ )
+ while True:
+ donks = res_donks.fetchmany()
+ if not donks:
+ return
+
+ yield from donks
+
+
+def _lonk_impl(db_con, client_id, lonk_url, honk_url):
+ fn_media_url = _proxy_url_factory(environ.get("LONK_PROXY_MEDIA"), lonk_url)
+ home = honk_url.do_get(action="gethonks", page="home")
+ lonk_page = {}
for honk in reversed(home.get("honks") or []):
convoy = honk["Convoy"]
+ if convoy in lonk_page:
+ continue
+
row = db_con.execute(
- "SELECT convoy_id FROM convoy WHERE client_id=? AND convoy=?",
+ "SELECT convoy_id, convoy, honk_id, honker, url, html, date FROM convoy WHERE client_id=? AND convoy=?",
(client_id, convoy)
).fetchone()
if row:
+ lonk_page[convoy] = _Collected(*row)
continue
- def _save_convoy(honker, honk):
+ def _save_convoy(convoy, honker, honk):
row = db_con.execute(
"""
INSERT INTO
convoy(convoy, client_id, honk_id, honker, url, html, date)
VALUES
(?, ?, ?, ?, ?, ?, ?)
- RETURNING convoy_id
+ RETURNING
+ convoy_id
""",
(convoy, client_id, honk["ID"], honker, honk["XID"], honk["HTML"], honk["Date"])
).fetchone()
convoy_id, = row
+ lonk_page[convoy] = _Collected(
+ convoy_id, convoy, honk["ID"], honker, honk["XID"], honk["HTML"], honk["Date"]
+ )
for donk in (honk.get("Donks") or []):
donk_url = honk_url.build_url(path=f'/d/{donk["XID"]}') if donk.get("XID") else donk["URL"]
+ mime, alt_text = donk["Media"], donk.get("Desc") or donk.get("Name") or None
db_con.execute(
"INSERT INTO donk (convoy_id, url, mime, alt_text) VALUES (?, ?, ?, ?)",
- (convoy_id, donk_url, donk["Media"], donk.get("Desc") or donk.get("Name") or None)
+ (lonk_page[convoy].convoy_id, donk_url, mime, alt_text, )
)
+ lonk_page[convoy].add_donk(donk_url, mime, alt_text)
+ lonk_page[convoy].end_of_donks()
if honk.get("RID"):
for honk_in_convoy in honk_url.do_get(action="gethonks", page="convoy", c=convoy)["honks"]:
if not honk_in_convoy.get("RID"):
author = honk_in_convoy.get("Oondle") or honk_in_convoy["Handle"]
honker = f'{author} (🧵 {honk["Handle"]})'
- _save_convoy(honker, honk_in_convoy)
+ _save_convoy(convoy, honker, honk_in_convoy)
break
else:
- db_con.execute(
+ row = db_con.execute(
"""
INSERT INTO
convoy(convoy, client_id, honk_id)
VALUES
(?, ?, ?)
+ RETURNING
+ convoy_id
""",
(convoy, client_id, honk["ID"])
- )
+ ).fetchone()
+ convoy_id, = row
+ lonk_page[convoy] = _Collected(convoy_id, convoy, None, None, None, None, None)
else:
oondle = honk.get("Oondle")
honker = f'{oondle} (🔁 {honk["Handle"]})' if oondle else f'{honk["Handle"]}'
- _save_convoy(honker, honk)
+ _save_convoy(convoy, honker, honk)
- res = db_con.execute(
- """
- SELECT
- convoy_id, honker, url, html, date
- FROM
- convoy
- WHERE
- client_id=? AND honker IS NOT NULL
- ORDER BY
- convoy_id DESC
- LIMIT 256
- """,
- (client_id, )
- )
-
print("20 text/gemini\r")
print("# 𝓗 onk\r")
print("\r")
@@ -395,37 +416,26 @@ def _lonk_impl(db_con, client_id, lonk_url, honk_url):
print(line + "\r")
print("\r")
- while True:
- rows = res.fetchmany()
- if not rows:
- break
+ for collected in reversed(lonk_page.values()):
+ if collected.honker is None:
+ continue
+ lines = [
+ f"## From {collected.honker} {collected.date}",
+ f"=> {collected.url}",
+ HtmlToGmi(honk_url.build_url(), fn_media_url).feed(collected.html)
+ ]
- for row in rows:
- convoy_id, honker, url, html, date, = row
- lines = [
- f"## From {honker} {date}",
- f"=> {url}",
- HtmlToGmi(honk_url.build_url(), fn_media_url).feed(html)
- ]
- res_donks = db_con.execute(
- "SELECT url, mime, alt_text FROM donk WHERE convoy_id=?",
- (convoy_id, )
- )
- while True:
- donks = res_donks.fetchmany()
- if not donks:
- break
- for donk in donks:
- donk_url, donk_mime, donk_text, = donk
- lines.append(f'=> {fn_media_url(donk_mime, donk_url)}')
- if donk_text:
- lines.append(donk_text)
- print("\r\n".join(lines))
- print("\r")
+ for donk_url, donk_mime, donk_text in collected.iterate_donks(db_con):
+ lines.append(f'=> {fn_media_url(donk_mime, donk_url)}')
+ if donk_text:
+ lines.append(donk_text)
+ print("\r\n".join(lines))
+ print("\r")
+
def lonk(cert_hash, lonk_url):
- db_con = db_connect(cert_hash)
+ db_con = db_connect()
row = db_con.execute("SELECT client_id, honk_url, token FROM client WHERE cert_hash=?", (cert_hash, )).fetchone()
if not row:
print(f'30 {lonk_url.build("ask_server")}\r')
@@ -440,8 +450,6 @@ def new_client_stage_1_ask_server(lonk_url):
print("10 Honk server URL\r")
return
splitted = urlsplit(unquote(lonk_url.query))
- honk_url = urlunsplit((splitted.scheme, splitted.netloc, "", "", ""))
-
path = [quote(urlunsplit((splitted.scheme, splitted.netloc, "", "", "")), safe=""), "ask_username"]
print(f'30 {lonk_url.build(path)}\r')
@@ -474,7 +482,7 @@ def new_client_stage_3_ask_password(cert_hash, lonk_ur
}
with urlopen(honk_url + "/dologin", data=urlencode(post_data).encode(), timeout=15) as f:
token = f.read().decode("utf8")
- db_con = db_connect(cert_hash)
+ db_con = db_connect()
with db_con:
db_con.execute(
"INSERT INTO client (cert_hash, honk_url, token) VALUES (?, ?, ?)",
@@ -514,15 +522,15 @@ def vgi(cert_hash, raw_url):
if __name__ == '__main__':
- cert_hash = environ.get("VGI_CERT_HASH")
- if cert_hash:
+ cert_hash_ = environ.get("VGI_CERT_HASH")
+ if cert_hash_:
try:
start_time = clock_gettime(CLOCK_MONOTONIC)
try:
- raw_url = input().strip()
- vgi(cert_hash, raw_url)
+ input_url = input().strip()
+ vgi(cert_hash_, input_url)
finally:
- stderr.write(f"{cert_hash}|{raw_url}|{clock_gettime(CLOCK_MONOTONIC) - start_time:.3f}sec.\n")
+ stderr.write(f"{cert_hash_}|{input_url}|{clock_gettime(CLOCK_MONOTONIC) - start_time:.3f}sec.\n")
except HTTPError as error:
print(f"43 Remote server return {error.code}: {error.reason}\r")
except URLError as error:
blob - b4f00241cdc9d1d15900206ebe3aa4860a9f2f8f
blob + ab4b022bf5c7162b225518866f3eae5abeb9a94a
--- tests.py
+++ tests.py
@@ -2,10 +2,14 @@ from unittest import TestCase, main
from lonk import HtmlToGmi
+def fn_media_url(mime, img_url):
+ return img_url
+
+
class TestHtmlToGmi(TestCase):
def test_br(self):
html = 'head
tail
'
- self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), """\
+ self.assertEqual(HtmlToGmi("https://localhost/api", fn_media_url).feed(html), """\
head
tail
@@ -13,18 +17,18 @@ tail
def test_img_realtive(self):
html = ''
- self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), "=> https://localhost/d/xxxx.jpg yyy")
+ self.assertEqual(HtmlToGmi("https://localhost/api", fn_media_url).feed(html), "=> https://localhost/d/xxxx.jpg yyy")
def test_link_realtive(self):
html = 'head https link tail
'
- self.assertEqual(HtmlToGmi("https://localhost/api").feed(html), """\
+ self.assertEqual(HtmlToGmi("https://localhost/api", fn_media_url).feed(html), """\
head ↳ https link tail
=> https://localhost/sub1/1 https link
""")
def test_links_in_paragraph(self):
html = 'head https link gemini link tail
'
- self.assertEqual(HtmlToGmi("").feed(html), """\
+ self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\
head ↳ https link ↳ gemini link tail
=> https://127.0.0.1 https link
=> gemini://127.0.0.1 gemini link
@@ -32,11 +36,11 @@ head ↳ https link ↳ gemini link tail
def test_in_text_tag(self):
html = "bold text
"
- self.assertEqual(HtmlToGmi("").feed(html), "bold text\n")
+ self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), "bold text\n")
def test_img_emu(self):
html = "aa bb
"
- self.assertEqual(HtmlToGmi("").feed(html), "aa :blobcatgooglyshrug: bb\n")
+ self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), "aa :blobcatgooglyshrug: bb\n")
def test_html2gmi_header(self):
html = """\
@@ -45,7 +49,7 @@ head ↳ https link ↳ gemini link tail
Header 1.1
Paragraph 1.1
"""
- self.assertEqual(HtmlToGmi("").feed(html), """\
+ self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\
# Header 1
Paragraph 1
@@ -64,7 +68,7 @@ def fib(n):
return fib(n - 1) + fib(n - 1)
"""
- self.assertEqual(HtmlToGmi("").feed(html), """\
+ self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\
```
def fib(n):
@@ -84,7 +88,7 @@ def fib(n):
White cold drink
"""
- self.assertEqual(HtmlToGmi("").feed(html), """\
+ self.assertEqual(HtmlToGmi("", fn_media_url).feed(html), """\
* Coffee
Black hot drink