Blame


1 2e64c739 2024-03-30 continue """Yet another http-to-gemini."""
2 2e64c739 2024-03-30 continue import socket
3 2e64c739 2024-03-30 continue import ssl
4 2e64c739 2024-03-30 continue import xml.etree.ElementTree as ET
5 2e64c739 2024-03-30 continue from argparse import ArgumentParser
6 2e64c739 2024-03-30 continue from email.message import Message
7 2e64c739 2024-03-30 continue from http import HTTPStatus
8 2e64c739 2024-03-30 continue from http.server import HTTPServer, BaseHTTPRequestHandler
9 2e64c739 2024-03-30 continue from urllib.parse import parse_qs, urlparse, urljoin, urlencode, uses_relative, uses_netloc
10 2e64c739 2024-03-30 continue from contextlib import contextmanager
11 2e64c739 2024-03-30 continue
12 2e64c739 2024-03-30 continue # for urljoin:
13 2e64c739 2024-03-30 continue uses_relative.append("gemini")
14 2e64c739 2024-03-30 continue uses_netloc.append("gemini")
15 2e64c739 2024-03-30 continue
16 2e64c739 2024-03-30 continue
17 2e64c739 2024-03-30 continue def _build_navigation(url=None):
18 2e64c739 2024-03-30 continue form = ET.Element("form")
19 2e64c739 2024-03-30 continue form.attrib.update(method="get")
20 2e64c739 2024-03-30 continue input_ = ET.SubElement(form, "input")
21 2e64c739 2024-03-30 continue input_.attrib.update(
22 2e64c739 2024-03-30 continue **{
23 2e64c739 2024-03-30 continue "title": "url",
24 2e64c739 2024-03-30 continue "type": "text",
25 2e64c739 2024-03-30 continue "name": "url",
26 2e64c739 2024-03-30 continue "placeholder": "gemini://",
27 2e64c739 2024-03-30 continue "autocomplete": "off",
28 2e64c739 2024-03-30 continue "size": "64",
29 2e64c739 2024-03-30 continue }
30 2e64c739 2024-03-30 continue )
31 2e64c739 2024-03-30 continue if url:
32 2e64c739 2024-03-30 continue input_.attrib.update(value=url)
33 2e64c739 2024-03-30 continue input_ = ET.SubElement(form, "input")
34 2e64c739 2024-03-30 continue input_.attrib.update(**{"type": "submit", "value": "go!"})
35 2e64c739 2024-03-30 continue return ET.tostring(form) + b"\r\n"
36 2e64c739 2024-03-30 continue
37 2e64c739 2024-03-30 continue
38 2e64c739 2024-03-30 continue class _HTTPServer(HTTPServer):
39 2e64c739 2024-03-30 continue def __init__(
40 2e64c739 2024-03-30 continue self,
41 2e64c739 2024-03-30 continue *args,
42 2e64c739 2024-03-30 continue header_file_path=None,
43 2e64c739 2024-03-30 continue footer_file_path=None,
44 2e64c739 2024-03-30 continue icon_file_path=None,
45 2e64c739 2024-03-30 continue css_file_path=None,
46 bfd26159 2024-07-31 continue robots_file_path=None,
47 2e64c739 2024-03-30 continue **kwargs
48 2e64c739 2024-03-30 continue ):
49 2e64c739 2024-03-30 continue super().__init__(*args, **kwargs)
50 2e64c739 2024-03-30 continue with open(header_file_path, "rb") as f:
51 2e64c739 2024-03-30 continue self.header_file_bytes = f.read()
52 2e64c739 2024-03-30 continue with open(footer_file_path, "rb") as f:
53 2e64c739 2024-03-30 continue self.footer_file_bytes = f.read()
54 2e64c739 2024-03-30 continue with open(icon_file_path, "rb") as f:
55 2e64c739 2024-03-30 continue self.icon_file_bytes = f.read()
56 2e64c739 2024-03-30 continue with open(css_file_path, "rb") as f:
57 2e64c739 2024-03-30 continue self.css_file_bytes = f.read()
58 bfd26159 2024-07-31 continue with open(robots_file_path, "rb") as f:
59 bfd26159 2024-07-31 continue self.robots_file_bytes = f.read()
60 2e64c739 2024-03-30 continue
61 2e64c739 2024-03-30 continue
62 2e64c739 2024-03-30 continue class _Elem:
63 2e64c739 2024-03-30 continue def __init__(self, file):
64 2e64c739 2024-03-30 continue self.elem = None
65 8ad6b2ac 2024-04-12 continue self.file = file
66 2e64c739 2024-03-30 continue
67 2e64c739 2024-03-30 continue @contextmanager
68 2e64c739 2024-03-30 continue def __call__(self):
69 2e64c739 2024-03-30 continue yield self
70 2e64c739 2024-03-30 continue self.flush()
71 2e64c739 2024-03-30 continue
72 8ad6b2ac 2024-04-12 continue def flush_bytes(self):
73 8ad6b2ac 2024-04-12 continue if self.elem is None:
74 8ad6b2ac 2024-04-12 continue return b""
75 8ad6b2ac 2024-04-12 continue
76 8ad6b2ac 2024-04-12 continue rv = ET.tostring(self.elem) + b"\r\n"
77 8ad6b2ac 2024-04-12 continue self.elem = None
78 8ad6b2ac 2024-04-12 continue return rv
79 8ad6b2ac 2024-04-12 continue
80 2e64c739 2024-03-30 continue def flush(self):
81 8ad6b2ac 2024-04-12 continue self.file.write(self.flush_bytes())
82 2e64c739 2024-03-30 continue
83 2e64c739 2024-03-30 continue
84 8ad6b2ac 2024-04-12 continue class _FlushBeforeWrite:
85 2e64c739 2024-03-30 continue def __init__(self, elem):
86 2e64c739 2024-03-30 continue self._elem = elem
87 8ad6b2ac 2024-04-12 continue self._file = elem.file
88 2e64c739 2024-03-30 continue
89 2e64c739 2024-03-30 continue def cancel(self):
90 2e64c739 2024-03-30 continue self._elem = None
91 8ad6b2ac 2024-04-12 continue
92 8ad6b2ac 2024-04-12 continue def commit(self):
93 8ad6b2ac 2024-04-12 continue if self._elem is not None:
94 8ad6b2ac 2024-04-12 continue self._elem.flush()
95 8ad6b2ac 2024-04-12 continue self._elem = None
96 8ad6b2ac 2024-04-12 continue return self._file
97 2e64c739 2024-03-30 continue
98 2e64c739 2024-03-30 continue @contextmanager
99 2e64c739 2024-03-30 continue def __call__(self):
100 2e64c739 2024-03-30 continue yield self
101 8ad6b2ac 2024-04-12 continue self.commit()
102 2e64c739 2024-03-30 continue
103 2e64c739 2024-03-30 continue
104 2e64c739 2024-03-30 continue class _RequestHandler(BaseHTTPRequestHandler):
105 2e64c739 2024-03-30 continue def _parse_path(self):
106 2e64c739 2024-03-30 continue _, _, path, _, query, _ = urlparse(self.path)
107 2e64c739 2024-03-30 continue return path, parse_qs(query) if query else {}
108 2e64c739 2024-03-30 continue
109 2e64c739 2024-03-30 continue def do_GET(self):
110 34e08cc3 2024-08-01 continue user_agent = self.headers.get("User-Agent", "")
111 34e08cc3 2024-08-01 continue for prefix in ("facebook", "meta", ):
112 34e08cc3 2024-08-01 continue if user_agent.startswith(prefix):
113 34e08cc3 2024-08-01 continue self.send_error(HTTPStatus.FORBIDDEN, "Crawlers are not allowed (see robots.txt)")
114 34e08cc3 2024-08-01 continue return
115 34e08cc3 2024-08-01 continue
116 2e64c739 2024-03-30 continue path, query = self._parse_path()
117 2e64c739 2024-03-30 continue if path in {"/index.html", "/index.htm", "/index", "/"}:
118 2e64c739 2024-03-30 continue url = query.get("url", [None])[0]
119 2e64c739 2024-03-30 continue if not url:
120 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
121 2e64c739 2024-03-30 continue self.send_header("Content-type", "text/html")
122 2e64c739 2024-03-30 continue self.end_headers()
123 2e64c739 2024-03-30 continue self.wfile.write(
124 2e64c739 2024-03-30 continue self.server.header_file_bytes.replace(b"$URL", b"yet another http-to-gemini")
125 2e64c739 2024-03-30 continue )
126 2e64c739 2024-03-30 continue self.wfile.write(_build_navigation())
127 2e64c739 2024-03-30 continue self.wfile.write(self.server.footer_file_bytes)
128 2e64c739 2024-03-30 continue return
129 2e64c739 2024-03-30 continue
130 2e64c739 2024-03-30 continue try:
131 4e5d1555 2024-04-11 continue for _ in range(6): # first request + 5 consecutive redirects
132 2e64c739 2024-03-30 continue parsed = urlparse(url)
133 2e64c739 2024-03-30 continue if parsed.scheme != "gemini":
134 2e64c739 2024-03-30 continue self.send_error(HTTPStatus.BAD_REQUEST, "Only gemini:// URLs are supported")
135 2e64c739 2024-03-30 continue return
136 2e64c739 2024-03-30 continue
137 2e64c739 2024-03-30 continue context = ssl.SSLContext()
138 2e64c739 2024-03-30 continue context.check_hostname = False
139 2e64c739 2024-03-30 continue context.verify_mode = ssl.CERT_NONE
140 2e64c739 2024-03-30 continue with socket.create_connection((parsed.hostname, parsed.port or 1965)) as raw_s:
141 401c2cd4 2024-04-01 continue with context.wrap_socket(raw_s, server_hostname=parsed.hostname) as s:
142 2e64c739 2024-03-30 continue s.sendall((url + '\r\n').encode("UTF-8"))
143 2e64c739 2024-03-30 continue fp = s.makefile("rb")
144 2e64c739 2024-03-30 continue splitted = fp.readline().decode("UTF-8").strip().split(maxsplit=1)
145 2e64c739 2024-03-30 continue status = splitted[0]
146 2e64c739 2024-03-30 continue if status.startswith("3") and len(splitted) == 2:
147 2e64c739 2024-03-30 continue # redirect
148 2e64c739 2024-03-30 continue url = urljoin(url, splitted[1])
149 2e64c739 2024-03-30 continue continue
150 2e64c739 2024-03-30 continue if not status.startswith("2"):
151 f07be084 2024-04-22 continue self.send_error(
152 f07be084 2024-04-22 continue HTTPStatus.INTERNAL_SERVER_ERROR,
153 f07be084 2024-04-22 continue f"Unsupported answer: {splitted[0]}",
154 f07be084 2024-04-22 continue splitted[1] if len(splitted) == 2 else None
155 f07be084 2024-04-22 continue )
156 2e64c739 2024-03-30 continue return
157 2e64c739 2024-03-30 continue mime = splitted[1].lower() if len(splitted) == 2 else "text/gemini"
158 2e64c739 2024-03-30 continue if not mime.startswith("text/gemini"):
159 2e64c739 2024-03-30 continue # return as-is
160 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
161 2e64c739 2024-03-30 continue self.send_header("Content-type", mime)
162 2e64c739 2024-03-30 continue self.end_headers()
163 2e64c739 2024-03-30 continue self.wfile.write(fp.read())
164 2e64c739 2024-03-30 continue return
165 2e64c739 2024-03-30 continue m = Message()
166 2e64c739 2024-03-30 continue m['content-type'] = mime
167 2e64c739 2024-03-30 continue body = fp.read().decode(m.get_param('charset') or "UTF-8")
168 2e64c739 2024-03-30 continue self._convert_gemini_to_html(url, body, mime)
169 2e64c739 2024-03-30 continue return
170 2e64c739 2024-03-30 continue break
171 2e64c739 2024-03-30 continue else:
172 2e64c739 2024-03-30 continue raise RuntimeError("Too many redirects")
173 2e64c739 2024-03-30 continue except Exception as error:
174 2e64c739 2024-03-30 continue self.send_error(HTTPStatus.INTERNAL_SERVER_ERROR, str(error))
175 2e64c739 2024-03-30 continue return
176 2e64c739 2024-03-30 continue
177 2e64c739 2024-03-30 continue if path == "/favicon.ico":
178 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
179 2e64c739 2024-03-30 continue self.send_header("Content-type", "image/x-icon")
180 2e64c739 2024-03-30 continue self.end_headers()
181 2e64c739 2024-03-30 continue self.wfile.write(self.server.icon_file_bytes)
182 2e64c739 2024-03-30 continue return
183 2e64c739 2024-03-30 continue
184 2e64c739 2024-03-30 continue if path == "/style.css":
185 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
186 2e64c739 2024-03-30 continue self.send_header("Content-type", "text/css")
187 2e64c739 2024-03-30 continue self.end_headers()
188 2e64c739 2024-03-30 continue self.wfile.write(self.server.css_file_bytes)
189 8eec3b60 2024-07-31 continue return
190 8eec3b60 2024-07-31 continue
191 91877508 2024-07-31 continue if path == "/robots.txt":
192 8eec3b60 2024-07-31 continue self.send_response(HTTPStatus.OK)
193 8eec3b60 2024-07-31 continue self.send_header("Content-type", "text/plain")
194 8eec3b60 2024-07-31 continue self.end_headers()
195 bfd26159 2024-07-31 continue self.wfile.write(self.server.robots_file_bytes)
196 2e64c739 2024-03-30 continue return
197 2e64c739 2024-03-30 continue
198 2e64c739 2024-03-30 continue self.send_error(HTTPStatus.NOT_FOUND, "File not found")
199 2e64c739 2024-03-30 continue
200 2e64c739 2024-03-30 continue def _convert_gemini_to_html(self, url, body, mime):
201 2e64c739 2024-03-30 continue # convert gemini (body) to html
202 2e64c739 2024-03-30 continue self.send_response(HTTPStatus.OK)
203 2e64c739 2024-03-30 continue self.send_header("Content-type", mime.replace("gemini", "html"))
204 2e64c739 2024-03-30 continue self.end_headers()
205 2e64c739 2024-03-30 continue self.wfile.write(self.server.header_file_bytes.replace(b"$URL", url.encode()))
206 2e64c739 2024-03-30 continue self.wfile.write(_build_navigation(url))
207 2e64c739 2024-03-30 continue with _Elem(self.wfile)() as pre:
208 2e64c739 2024-03-30 continue with _Elem(self.wfile)() as ul:
209 2e64c739 2024-03-30 continue for line in body.splitlines():
210 8ad6b2ac 2024-04-12 continue with _FlushBeforeWrite(ul)() as flush_before:
211 2e64c739 2024-03-30 continue if line.startswith("```"):
212 2e64c739 2024-03-30 continue if pre.elem is None:
213 2e64c739 2024-03-30 continue pre.elem = ET.Element("pre")
214 2e64c739 2024-03-30 continue pre.elem.text = ""
215 2e64c739 2024-03-30 continue else:
216 8ad6b2ac 2024-04-12 continue flush_before.commit().write(pre.flush_bytes())
217 2e64c739 2024-03-30 continue elif pre.elem is not None:
218 2e64c739 2024-03-30 continue if pre.elem.text:
219 2e64c739 2024-03-30 continue pre.elem.text += "\r\n"
220 2e64c739 2024-03-30 continue pre.elem.text += line
221 2e64c739 2024-03-30 continue elif line.startswith("=>") and line[2:].strip():
222 2e64c739 2024-03-30 continue p = ET.Element("p")
223 2e64c739 2024-03-30 continue p.text = "=> "
224 2e64c739 2024-03-30 continue splitted = line[2:].strip().split(maxsplit=1)
225 2e64c739 2024-03-30 continue target = urljoin(url, splitted[0])
226 2e64c739 2024-03-30 continue a = ET.SubElement(p, "a")
227 2e64c739 2024-03-30 continue if urlparse(target).scheme == "gemini":
228 2e64c739 2024-03-30 continue a.attrib.update(href="/?" + urlencode({"url": target}))
229 2e64c739 2024-03-30 continue else:
230 2e64c739 2024-03-30 continue a.attrib.update(target="_blank", href=target)
231 2e64c739 2024-03-30 continue a.text = splitted[1] if len(splitted) > 1 else target
232 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(p) + b"\r\n")
233 2e64c739 2024-03-30 continue elif line.startswith("#") and len(line.split()) > 1:
234 2e64c739 2024-03-30 continue splitted = line.split(maxsplit=1)
235 2e64c739 2024-03-30 continue h = ET.Element("h" + str(len(splitted[0])))
236 2e64c739 2024-03-30 continue h.text = splitted[1]
237 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(h) + b"\r\n")
238 2e64c739 2024-03-30 continue elif line.startswith("* ") and line[2:].strip():
239 2e64c739 2024-03-30 continue if ul.elem is None:
240 2e64c739 2024-03-30 continue ul.elem = ET.Element("ul")
241 2e64c739 2024-03-30 continue ET.SubElement(ul.elem, "li").text = line[2:].strip()
242 8ad6b2ac 2024-04-12 continue flush_before.cancel()
243 2e64c739 2024-03-30 continue elif line.startswith("> ") and line[2:].strip():
244 2e64c739 2024-03-30 continue blockquote = ET.Element("blockquote")
245 2e64c739 2024-03-30 continue ET.SubElement(blockquote, "p").text = line[2:].strip()
246 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(blockquote) + b"\r\n")
247 2e64c739 2024-03-30 continue else:
248 2e64c739 2024-03-30 continue if line:
249 2e64c739 2024-03-30 continue p = ET.Element("p")
250 2e64c739 2024-03-30 continue p.text = line
251 8ad6b2ac 2024-04-12 continue flush_before.commit().write(ET.tostring(p) + b"\r\n")
252 2e0d560d 2024-03-30 continue self.wfile.write(self.server.footer_file_bytes)
253 2e64c739 2024-03-30 continue
254 2e64c739 2024-03-30 continue
255 2e64c739 2024-03-30 continue def _main():
256 2e64c739 2024-03-30 continue parser = ArgumentParser()
257 2e64c739 2024-03-30 continue parser.add_argument("--address", default="127.0.0.1", help="bind to this address (default: %(default)s)")
258 2e64c739 2024-03-30 continue parser.add_argument("--port", default=8000, type=int, help="bind to this port (default: %(default)s)")
259 2e64c739 2024-03-30 continue parser.add_argument("--header", required=True, help="path to `index.html.header`")
260 2e64c739 2024-03-30 continue parser.add_argument("--footer", required=True, help="path to `index.html.footer`")
261 2e64c739 2024-03-30 continue parser.add_argument("--icon", required=True , help="path to `favicon.ico`")
262 2e64c739 2024-03-30 continue parser.add_argument("--css", required=True, help="path to `style.css`")
263 bfd26159 2024-07-31 continue parser.add_argument("--robots", required=True, help="path to `robots.txt`")
264 2e64c739 2024-03-30 continue args = parser.parse_args()
265 2e64c739 2024-03-30 continue
266 2e64c739 2024-03-30 continue with _HTTPServer(
267 2e64c739 2024-03-30 continue (args.address, args.port),
268 2e64c739 2024-03-30 continue _RequestHandler,
269 2e64c739 2024-03-30 continue header_file_path=args.header,
270 2e64c739 2024-03-30 continue footer_file_path=args.footer,
271 2e64c739 2024-03-30 continue icon_file_path=args.icon,
272 2e64c739 2024-03-30 continue css_file_path=args.css,
273 bfd26159 2024-07-31 continue robots_file_path=args.robots,
274 2e64c739 2024-03-30 continue ) as http_server:
275 2e64c739 2024-03-30 continue sock_host, sock_port = http_server.socket.getsockname()[:2]
276 2e64c739 2024-03-30 continue print(f"HTTP server started ({sock_host}:{sock_port})...")
277 2e64c739 2024-03-30 continue try:
278 2e64c739 2024-03-30 continue http_server.serve_forever()
279 2e64c739 2024-03-30 continue except KeyboardInterrupt:
280 2e64c739 2024-03-30 continue print("\nKeyboard interrupt received, exiting.")
281 2e64c739 2024-03-30 continue
282 2e64c739 2024-03-30 continue
283 2e64c739 2024-03-30 continue if __name__ == '__main__':
284 2e64c739 2024-03-30 continue _main()