1 2e64c739 2024-03-30 continue """Yet another http-to-gemini."""
2 2e64c739 2024-03-30 continue import socket
3 2e64c739 2024-03-30 continue import ssl
4 2e64c739 2024-03-30 continue import xml.etree.ElementTree as ET
5 2e64c739 2024-03-30 continue from argparse import ArgumentParser
6 2e64c739 2024-03-30 continue from email.message import Message
7 2e64c739 2024-03-30 continue from http import HTTPStatus
8 c1f94e6b 2024-10-18 continue from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
9 2e64c739 2024-03-30 continue from urllib.parse import parse_qs, urlparse, urljoin, urlencode, uses_relative, uses_netloc
10 2e64c739 2024-03-30 continue from contextlib import contextmanager
12 2e64c739 2024-03-30 continue # for urljoin:
13 2e64c739 2024-03-30 continue uses_relative.append("gemini")
14 2e64c739 2024-03-30 continue uses_netloc.append("gemini")
17 2e64c739 2024-03-30 continue def _build_navigation(url=None):
18 2e64c739 2024-03-30 continue form = ET.Element("form")
19 2e64c739 2024-03-30 continue form.attrib.update(method="get")
20 2e64c739 2024-03-30 continue input_ = ET.SubElement(form, "input")
21 2e64c739 2024-03-30 continue input_.attrib.update(
23 2e64c739 2024-03-30 continue "title": "url",
24 2e64c739 2024-03-30 continue "type": "text",
25 2e64c739 2024-03-30 continue "name": "url",
26 2e64c739 2024-03-30 continue "placeholder": "gemini://",
27 2e64c739 2024-03-30 continue "autocomplete": "off",
28 2e64c739 2024-03-30 continue "size": "64",
32 2e64c739 2024-03-30 continue input_.attrib.update(value=url)
33 2e64c739 2024-03-30 continue input_ = ET.SubElement(form, "input")
34 2e64c739 2024-03-30 continue input_.attrib.update(**{"type": "submit", "value": "go!"})
35 2e64c739 2024-03-30 continue return ET.tostring(form) + b"\r\n"
38 c1f94e6b 2024-10-18 continue class _HTTPServer(ThreadingHTTPServer):
39 2e64c739 2024-03-30 continue def __init__(
42 2e64c739 2024-03-30 continue header_file_path=None,
43 2e64c739 2024-03-30 continue footer_file_path=None,
44 2e64c739 2024-03-30 continue icon_file_path=None,
45 2e64c739 2024-03-30 continue css_file_path=None,
46 bfd26159 2024-07-31 continue robots_file_path=None,
47 2e64c739 2024-03-30 continue **kwargs
49 2e64c739 2024-03-30 continue super().__init__(*args, **kwargs)
50 2e64c739 2024-03-30 continue with open(header_file_path, "rb") as f:
51 2e64c739 2024-03-30 continue self.header_file_bytes = f.read()
52 2e64c739 2024-03-30 continue with open(footer_file_path, "rb") as f:
53 2e64c739 2024-03-30 continue self.footer_file_bytes = f.read()
54 2e64c739 2024-03-30 continue with open(icon_file_path, "rb") as f:
55 2e64c739 2024-03-30 continue self.icon_file_bytes = f.read()
56 2e64c739 2024-03-30 continue with open(css_file_path, "rb") as f:
57 2e64c739 2024-03-30 continue self.css_file_bytes = f.read()
58 bfd26159 2024-07-31 continue with open(robots_file_path, "rb") as f:
59 bfd26159 2024-07-31 continue self.robots_file_bytes = f.read()
62 2e64c739 2024-03-30 continue class _Elem:
63 2e64c739 2024-03-30 continue def __init__(self, file):
64 2e64c739 2024-03-30 continue self.elem = None
65 8ad6b2ac 2024-04-12 continue self.file = file
67 2e64c739 2024-03-30 continue @contextmanager
68 2e64c739 2024-03-30 continue def __call__(self):
69 2e64c739 2024-03-30 continue yield self
70 2e64c739 2024-03-30 continue self.flush()
72 8ad6b2ac 2024-04-12 continue def flush_bytes(self):
73 8ad6b2ac 2024-04-12 continue if self.elem is None:
74 8ad6b2ac 2024-04-12 continue return b""
76 8ad6b2ac 2024-04-12 continue rv = ET.tostring(self.elem) + b"\r\n"
77 8ad6b2ac 2024-04-12 continue self.elem = None
78 8ad6b2ac 2024-04-12 continue return rv
80 2e64c739 2024-03-30 continue def flush(self):
81 8ad6b2ac 2024-04-12 continue self.file.write(self.flush_bytes())
84 8ad6b2ac 2024-04-12 continue class _FlushBeforeWrite:
85 2e64c739 2024-03-30 continue def __init__(self, elem):
86 2e64c739 2024-03-30 continue self._elem = elem
87 8ad6b2ac 2024-04-12 continue self._file = elem.file
89 2e64c739 2024-03-30 continue def cancel(self):
90 2e64c739 2024-03-30 continue self._elem = None
92 8ad6b2ac 2024-04-12 continue def commit(self):
93 8ad6b2ac 2024-04-12 continue if self._elem is not None:
94 8ad6b2ac 2024-04-12 continue self._elem.flush()
95 8ad6b2ac 2024-04-12 continue self._elem = None
96 8ad6b2ac 2024-04-12 continue return self._file
98 2e64c739 2024-03-30 continue @contextmanager
99 2e64c739 2024-03-30 continue def __call__(self):
100 2e64c739 2024-03-30 continue yield self
101 8ad6b2ac 2024-04-12 continue self.commit()
104 2e64c739 2024-03-30 continue class _RequestHandler(BaseHTTPRequestHandler):
105 2e64c739 2024-03-30 continue def _parse_path(self):
106 2e64c739 2024-03-30 continue _, _, path, _, query, _ = urlparse(self.path)
107 2e64c739 2024-03-30 continue return path, parse_qs(query) if query else {}
109 2e64c739 2024-03-30 continue def do_GET(self):
110 34e08cc3 2024-08-01 continue user_agent = self.headers.get("User-Agent", "")
111 34e08cc3 2024-08-01 continue for prefix in ("facebook", "meta", ):
112 34e08cc3 2024-08-01 continue if user_agent.startswith(prefix):
113 34e08cc3 2024-08-01 continue self.send_error(HTTPStatus.FORBIDDEN, "Crawlers are not allowed (see robots.txt)")
116 2e64c739 2024-03-30 continue path, query = self._parse_path()
117 2e64c739 2024-03-30 continue if path in {"/index.html", "/index.htm", "/index", "/"}:
118 2e64c739 2024-03-30 continue url = query.get("url", [None])[0]
119 2e64c739 2024-03-30 continue if not url:
120 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
121 2e64c739 2024-03-30 continue self.send_header("Content-type", "text/html")
122 2e64c739 2024-03-30 continue self.end_headers()
123 2e64c739 2024-03-30 continue self.wfile.write(
124 2e64c739 2024-03-30 continue self.server.header_file_bytes.replace(b"$URL", b"yet another http-to-gemini")
126 2e64c739 2024-03-30 continue self.wfile.write(_build_navigation())
127 2e64c739 2024-03-30 continue self.wfile.write(self.server.footer_file_bytes)
131 4e5d1555 2024-04-11 continue for _ in range(6): # first request + 5 consecutive redirects
132 2e64c739 2024-03-30 continue parsed = urlparse(url)
133 2e64c739 2024-03-30 continue if parsed.scheme != "gemini":
134 2e64c739 2024-03-30 continue self.send_error(HTTPStatus.BAD_REQUEST, "Only gemini:// URLs are supported")
137 c1f94e6b 2024-10-18 continue context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
138 2e64c739 2024-03-30 continue context.check_hostname = False
139 2e64c739 2024-03-30 continue context.verify_mode = ssl.CERT_NONE
140 2e64c739 2024-03-30 continue with socket.create_connection((parsed.hostname, parsed.port or 1965)) as raw_s:
141 401c2cd4 2024-04-01 continue with context.wrap_socket(raw_s, server_hostname=parsed.hostname) as s:
142 2e64c739 2024-03-30 continue s.sendall((url + '\r\n').encode("UTF-8"))
143 2e64c739 2024-03-30 continue fp = s.makefile("rb")
144 2e64c739 2024-03-30 continue splitted = fp.readline().decode("UTF-8").strip().split(maxsplit=1)
145 2e64c739 2024-03-30 continue status = splitted[0]
146 2e64c739 2024-03-30 continue if status.startswith("3") and len(splitted) == 2:
147 2e64c739 2024-03-30 continue # redirect
148 2e64c739 2024-03-30 continue url = urljoin(url, splitted[1])
149 2e64c739 2024-03-30 continue continue
150 2e64c739 2024-03-30 continue if not status.startswith("2"):
151 f07be084 2024-04-22 continue self.send_error(
152 f07be084 2024-04-22 continue HTTPStatus.INTERNAL_SERVER_ERROR,
153 f07be084 2024-04-22 continue f"Unsupported answer: {splitted[0]}",
154 f07be084 2024-04-22 continue splitted[1] if len(splitted) == 2 else None
157 2e64c739 2024-03-30 continue mime = splitted[1].lower() if len(splitted) == 2 else "text/gemini"
158 2e64c739 2024-03-30 continue if not mime.startswith("text/gemini"):
159 2e64c739 2024-03-30 continue # return as-is
160 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
161 2e64c739 2024-03-30 continue self.send_header("Content-type", mime)
162 2e64c739 2024-03-30 continue self.end_headers()
163 d8322e09 2024-09-15 continue while True:
164 d8322e09 2024-09-15 continue buffer = fp.read(64 * 1024)
165 d8322e09 2024-09-15 continue if not buffer:
166 d8322e09 2024-09-15 continue break # EOF
167 d8322e09 2024-09-15 continue self.wfile.write(buffer)
169 2e64c739 2024-03-30 continue m = Message()
170 2e64c739 2024-03-30 continue m['content-type'] = mime
171 2e64c739 2024-03-30 continue body = fp.read().decode(m.get_param('charset') or "UTF-8")
172 2e64c739 2024-03-30 continue self._convert_gemini_to_html(url, body, mime)
176 2e64c739 2024-03-30 continue raise RuntimeError("Too many redirects")
177 2e64c739 2024-03-30 continue except Exception as error:
178 2e64c739 2024-03-30 continue self.send_error(HTTPStatus.INTERNAL_SERVER_ERROR, str(error))
181 2e64c739 2024-03-30 continue if path == "/favicon.ico":
182 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
183 2e64c739 2024-03-30 continue self.send_header("Content-type", "image/x-icon")
184 2e64c739 2024-03-30 continue self.end_headers()
185 2e64c739 2024-03-30 continue self.wfile.write(self.server.icon_file_bytes)
188 2e64c739 2024-03-30 continue if path == "/style.css":
189 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
190 2e64c739 2024-03-30 continue self.send_header("Content-type", "text/css")
191 2e64c739 2024-03-30 continue self.end_headers()
192 2e64c739 2024-03-30 continue self.wfile.write(self.server.css_file_bytes)
195 91877508 2024-07-31 continue if path == "/robots.txt":
196 8eec3b60 2024-07-31 continue self.send_response(HTTPStatus.OK)
197 8eec3b60 2024-07-31 continue self.send_header("Content-type", "text/plain")
198 8eec3b60 2024-07-31 continue self.end_headers()
199 bfd26159 2024-07-31 continue self.wfile.write(self.server.robots_file_bytes)
202 2e64c739 2024-03-30 continue self.send_error(HTTPStatus.NOT_FOUND, "File not found")
204 2e64c739 2024-03-30 continue def _convert_gemini_to_html(self, url, body, mime):
205 2e64c739 2024-03-30 continue # convert gemini (body) to html
206 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
207 2e64c739 2024-03-30 continue self.send_header("Content-type", mime.replace("gemini", "html"))
208 2e64c739 2024-03-30 continue self.end_headers()
209 2e64c739 2024-03-30 continue self.wfile.write(self.server.header_file_bytes.replace(b"$URL", url.encode()))
210 2e64c739 2024-03-30 continue self.wfile.write(_build_navigation(url))
211 2e64c739 2024-03-30 continue with _Elem(self.wfile)() as pre:
212 2e64c739 2024-03-30 continue with _Elem(self.wfile)() as ul:
213 2e64c739 2024-03-30 continue for line in body.splitlines():
214 8ad6b2ac 2024-04-12 continue with _FlushBeforeWrite(ul)() as flush_before:
215 2e64c739 2024-03-30 continue if line.startswith("```"):
216 2e64c739 2024-03-30 continue if pre.elem is None:
217 2e64c739 2024-03-30 continue pre.elem = ET.Element("pre")
218 2e64c739 2024-03-30 continue pre.elem.text = ""
220 8ad6b2ac 2024-04-12 continue flush_before.commit().write(pre.flush_bytes())
221 2e64c739 2024-03-30 continue elif pre.elem is not None:
222 2e64c739 2024-03-30 continue if pre.elem.text:
223 2e64c739 2024-03-30 continue pre.elem.text += "\r\n"
224 2e64c739 2024-03-30 continue pre.elem.text += line
225 2e64c739 2024-03-30 continue elif line.startswith("=>") and line[2:].strip():
226 2e64c739 2024-03-30 continue p = ET.Element("p")
227 2e64c739 2024-03-30 continue p.text = "=> "
228 2e64c739 2024-03-30 continue splitted = line[2:].strip().split(maxsplit=1)
229 2e64c739 2024-03-30 continue target = urljoin(url, splitted[0])
230 2e64c739 2024-03-30 continue a = ET.SubElement(p, "a")
231 2e64c739 2024-03-30 continue if urlparse(target).scheme == "gemini":
232 2e64c739 2024-03-30 continue a.attrib.update(href="/?" + urlencode({"url": target}))
234 2e64c739 2024-03-30 continue a.attrib.update(target="_blank", href=target)
235 2e64c739 2024-03-30 continue a.text = splitted[1] if len(splitted) > 1 else target
236 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(p) + b"\r\n")
237 833c9850 2024-12-12 continue elif line.startswith("###"):
238 833c9850 2024-12-12 continue h = ET.Element("h3")
239 833c9850 2024-12-12 continue h.text = line[3:]
240 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(h) + b"\r\n")
241 833c9850 2024-12-12 continue elif line.startswith("##"):
242 833c9850 2024-12-12 continue h = ET.Element("h2")
243 833c9850 2024-12-12 continue h.text = line[2:]
244 833c9850 2024-12-12 continue flush_before.commit().write(ET.tostring(h) + b"\r\n")
245 833c9850 2024-12-12 continue elif line.startswith("#"):
246 833c9850 2024-12-12 continue h = ET.Element("h1")
247 833c9850 2024-12-12 continue h.text = line[1:]
248 833c9850 2024-12-12 continue flush_before.commit().write(ET.tostring(h) + b"\r\n")
249 2e64c739 2024-03-30 continue elif line.startswith("* ") and line[2:].strip():
250 2e64c739 2024-03-30 continue if ul.elem is None:
251 2e64c739 2024-03-30 continue ul.elem = ET.Element("ul")
252 2e64c739 2024-03-30 continue ET.SubElement(ul.elem, "li").text = line[2:].strip()
253 8ad6b2ac 2024-04-12 continue flush_before.cancel()
254 2e64c739 2024-03-30 continue elif line.startswith("> ") and line[2:].strip():
255 2e64c739 2024-03-30 continue blockquote = ET.Element("blockquote")
256 2e64c739 2024-03-30 continue ET.SubElement(blockquote, "p").text = line[2:].strip()
257 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(blockquote) + b"\r\n")
259 2e64c739 2024-03-30 continue if line:
260 2e64c739 2024-03-30 continue p = ET.Element("p")
261 2e64c739 2024-03-30 continue p.text = line
262 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(p) + b"\r\n")
263 2e0d560d 2024-03-30 continue self.wfile.write(self.server.footer_file_bytes)
266 2e64c739 2024-03-30 continue def _main():
267 2e64c739 2024-03-30 continue parser = ArgumentParser()
268 2e64c739 2024-03-30 continue parser.add_argument("--address", default="127.0.0.1", help="bind to this address (default: %(default)s)")
269 2e64c739 2024-03-30 continue parser.add_argument("--port", default=8000, type=int, help="bind to this port (default: %(default)s)")
270 2e64c739 2024-03-30 continue parser.add_argument("--header", required=True, help="path to `index.html.header`")
271 2e64c739 2024-03-30 continue parser.add_argument("--footer", required=True, help="path to `index.html.footer`")
272 2e64c739 2024-03-30 continue parser.add_argument("--icon", required=True , help="path to `favicon.ico`")
273 2e64c739 2024-03-30 continue parser.add_argument("--css", required=True, help="path to `style.css`")
274 bfd26159 2024-07-31 continue parser.add_argument("--robots", required=True, help="path to `robots.txt`")
275 2e64c739 2024-03-30 continue args = parser.parse_args()
277 2e64c739 2024-03-30 continue with _HTTPServer(
278 2e64c739 2024-03-30 continue (args.address, args.port),
279 2e64c739 2024-03-30 continue _RequestHandler,
280 2e64c739 2024-03-30 continue header_file_path=args.header,
281 2e64c739 2024-03-30 continue footer_file_path=args.footer,
282 2e64c739 2024-03-30 continue icon_file_path=args.icon,
283 2e64c739 2024-03-30 continue css_file_path=args.css,
284 bfd26159 2024-07-31 continue robots_file_path=args.robots,
285 2e64c739 2024-03-30 continue ) as http_server:
286 2e64c739 2024-03-30 continue sock_host, sock_port = http_server.socket.getsockname()[:2]
287 2e64c739 2024-03-30 continue print(f"HTTP server started ({sock_host}:{sock_port})...")
289 2e64c739 2024-03-30 continue http_server.serve_forever()
290 2e64c739 2024-03-30 continue except KeyboardInterrupt:
291 2e64c739 2024-03-30 continue print("\nKeyboard interrupt received, exiting.")
294 2e64c739 2024-03-30 continue if __name__ == '__main__':
295 2e64c739 2024-03-30 continue _main()