commit - 6253df7fd690eb69235c862b14329b396a4b9eae
commit + a58fea8282cec6d0d2c2301d91ae5dd72c46a8f1
blob - dea07540fce25f229f5ad5957e99bf8d14295dc5
blob + bb0516d44da55d09e45a46b4c3c12e9694433c14
--- katasterismos.py
+++ katasterismos.py
+from dataclasses import dataclass, field
+from datetime import date
from email.message import Message
from pathlib import Path
from socket import create_connection, gaierror
uses_netloc.append("gemini")
+@dataclass(kw_only=True)
+class Feed:
+ url: str
+ title: str | None = None
+
+
+@dataclass(kw_only=True, frozen=True)
+class FeedEntry:
+ url: str
+ updated: date
+ title: str
+ feed: Feed
+
+
class FeedError(RuntimeError):
def __init__(self, url, message):
super().__init__(url, message)
raise FeedError(url=url, message=str(error))
-def parse(url, feed_body):
- header = None
+def parse_feed(url, feed_body, feeds):
+ _feed = Feed(url=url)
for line in feed_body.splitlines():
- splitted = line.rstrip().split(maxsplit=1)
- if len(splitted) == 2 and splitted[0] == "#" and not header:
- header = splitted[1]
- print(f"{header} ({url})" if header else url)
+ if line.startswith("#") and line[1:2] != "#" and not _feed.title:
+ _feed.title = line[1:].strip()
+ elif line.startswith("=>"):
+ splitted = line[2:].strip().split(maxsplit=1)
+ if len(splitted) == 2:
+ entry_url, updated_title = splitted
+ idx = len("YYYY-MM-DD")
+ try:
+ updated = date.fromisoformat(updated_title[:idx])
+ except ValueError:
+ continue
+ entry_url = urljoin(url, entry_url)
+ feeds[entry_url] = FeedEntry(
+ url=entry_url,
+ updated=updated,
+ title=updated_title[idx:].lstrip(" -"),
+ feed=_feed
+ )
-
def daily(feeds_gmi):
feeds = {}
errors = []
header_2_passes = False
for line in feeds_gmi.read_text(encoding="utf8").splitlines():
- splitted = line.rstrip().split()
- if splitted and splitted[0] == "##":
+ if line.startswith("##"):
header_2_passes = True
- elif header_2_passes and len(splitted) > 1 and splitted[0] == "=>":
+ elif header_2_passes and line.startswith("=>"):
try:
- url = splitted[1]
- parse(url, get(url))
- feeds[url] = []
+ url = line[2:].strip()
+ parse_feed(url, get(url), feeds)
+ raise NotImplementedError(len(feeds), feeds, len(errors), errors)
except FeedError as error:
errors.append(error)
- raise NotImplementedError(len(feeds), feeds, len(errors), errors)
-
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser()