Commit Diff


commit - 6253df7fd690eb69235c862b14329b396a4b9eae
commit + a58fea8282cec6d0d2c2301d91ae5dd72c46a8f1
blob - dea07540fce25f229f5ad5957e99bf8d14295dc5
blob + bb0516d44da55d09e45a46b4c3c12e9694433c14
--- katasterismos.py
+++ katasterismos.py
@@ -1,3 +1,5 @@
+from dataclasses import dataclass, field
+from datetime import date
 from email.message import Message
 from pathlib import Path
 from socket import create_connection, gaierror
@@ -9,6 +11,20 @@ uses_relative.append("gemini")
 uses_netloc.append("gemini")
 
 
+@dataclass(kw_only=True)
+class Feed:
+    url: str
+    title: str | None = None
+
+
+@dataclass(kw_only=True, frozen=True)
+class FeedEntry:
+    url: str
+    updated: date
+    title: str
+    feed: Feed
+
+
 class FeedError(RuntimeError):
     def __init__(self, url, message):
         super().__init__(url, message)
@@ -52,35 +68,45 @@ def get(url):
             raise FeedError(url=url, message=str(error))
 
 
-def parse(url, feed_body):
-    header = None
+def parse_feed(url, feed_body, feeds):
+    _feed = Feed(url=url)
     for line in feed_body.splitlines():
-        splitted = line.rstrip().split(maxsplit=1)
-        if len(splitted) == 2 and splitted[0] == "#" and not header:
-            header = splitted[1]
-    print(f"{header} ({url})" if header else url)
+        if line.startswith("#") and line[1:2] != "#" and not _feed.title:
+            _feed.title = line[1:].strip()
+        elif line.startswith("=>"):
+            splitted = line[2:].strip().split(maxsplit=1)
+            if len(splitted) == 2:
+                entry_url, updated_title = splitted
+                idx = len("YYYY-MM-DD")
+                try:
+                    updated = date.fromisoformat(updated_title[:idx])
+                except ValueError:
+                    continue
+                entry_url = urljoin(url, entry_url)
+                feeds[entry_url] = FeedEntry(
+                    url=entry_url,
+                    updated=updated,
+                    title=updated_title[idx:].lstrip(" -"),
+                    feed=_feed
+                )
 
-
 def daily(feeds_gmi):
     feeds = {}
     errors = []
 
     header_2_passes = False
     for line in feeds_gmi.read_text(encoding="utf8").splitlines():
-        splitted = line.rstrip().split()
-        if splitted and splitted[0] == "##":
+        if line.startswith("##"):
             header_2_passes = True
-        elif header_2_passes and len(splitted) > 1 and splitted[0] == "=>":
+        elif header_2_passes and line.startswith("=>"):
             try:
-                url = splitted[1]
-                parse(url, get(url))
-                feeds[url] = []
+                url = line[2:].strip()
+                parse_feed(url, get(url), feeds)
+                raise NotImplementedError(len(feeds), feeds, len(errors), errors)
             except FeedError as error:
                 errors.append(error)
 
-    raise NotImplementedError(len(feeds), feeds, len(errors), errors)
 
-
 if __name__ == '__main__':
     from argparse import ArgumentParser
     parser = ArgumentParser()