commit a58fea8282cec6d0d2c2301d91ae5dd72c46a8f1 from: Aleksey Ryndin date: Sun Jan 26 19:07:54 2025 UTC Add work-in-progress commit - 6253df7fd690eb69235c862b14329b396a4b9eae commit + a58fea8282cec6d0d2c2301d91ae5dd72c46a8f1 blob - dea07540fce25f229f5ad5957e99bf8d14295dc5 blob + bb0516d44da55d09e45a46b4c3c12e9694433c14 --- katasterismos.py +++ katasterismos.py @@ -1,3 +1,5 @@ +from dataclasses import dataclass, field +from datetime import date from email.message import Message from pathlib import Path from socket import create_connection, gaierror @@ -9,6 +11,20 @@ uses_relative.append("gemini") uses_netloc.append("gemini") +@dataclass(kw_only=True) +class Feed: + url: str + title: str | None = None + + +@dataclass(kw_only=True, frozen=True) +class FeedEntry: + url: str + updated: date + title: str + feed: Feed + + class FeedError(RuntimeError): def __init__(self, url, message): super().__init__(url, message) @@ -52,35 +68,45 @@ def get(url): raise FeedError(url=url, message=str(error)) -def parse(url, feed_body): - header = None +def parse_feed(url, feed_body, feeds): + _feed = Feed(url=url) for line in feed_body.splitlines(): - splitted = line.rstrip().split(maxsplit=1) - if len(splitted) == 2 and splitted[0] == "#" and not header: - header = splitted[1] - print(f"{header} ({url})" if header else url) + if line.startswith("#") and line[1:2] != "#" and not _feed.title: + _feed.title = line[1:].strip() + elif line.startswith("=>"): + splitted = line[2:].strip().split(maxsplit=1) + if len(splitted) == 2: + entry_url, updated_title = splitted + idx = len("YYYY-MM-DD") + try: + updated = date.fromisoformat(updated_title[:idx]) + except ValueError: + continue + entry_url = urljoin(url, entry_url) + feeds[entry_url] = FeedEntry( + url=entry_url, + updated=updated, + title=updated_title[idx:].lstrip(" -"), + feed=_feed + ) - def daily(feeds_gmi): feeds = {} errors = [] header_2_passes = False for line in feeds_gmi.read_text(encoding="utf8").splitlines(): - splitted = line.rstrip().split() - if splitted and splitted[0] == "##": + if line.startswith("##"): header_2_passes = True - elif header_2_passes and len(splitted) > 1 and splitted[0] == "=>": + elif header_2_passes and line.startswith("=>"): try: - url = splitted[1] - parse(url, get(url)) - feeds[url] = [] + url = line[2:].strip() + parse_feed(url, get(url), feeds) + raise NotImplementedError(len(feeds), feeds, len(errors), errors) except FeedError as error: errors.append(error) - raise NotImplementedError(len(feeds), feeds, len(errors), errors) - if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser()