commit 3304ae77cac645c4050490bd7ce339bf7f8afc77 from: Aleksey Ryndin date: Sat Jul 06 18:00:57 2024 UTC Add VGI commit - 01105a5401d8b606c9d8327aa2ec2e48af826e55 commit + 3304ae77cac645c4050490bd7ce339bf7f8afc77 blob - 6800f1b3e5a99f6cf38f3a3471d80983148f3e73 blob + 0965f79a9250e3dfdcf7255ecaa0bff9b857c355 --- Makefile +++ Makefile @@ -8,7 +8,7 @@ clean: ${MAKE} -C tests clean run: server - ./vostok/vostok -c cert/server.crt -k cert/server.key -f ./ + ./vostok/vostok -c cert/server.crt -k cert/server.key -f ./ -g vgi -e ./vgi.sh tests: ${MAKE} -C tests blob - 416d3c3bfef21603014cd82dd6340bc2ac608e36 blob + 67bde05c01a4010c32975a6e41ad5199f300fd40 --- README.gmi +++ README.gmi @@ -6,7 +6,7 @@ ssh://anonymous@got.any-key.press/vostok ``` => https://got.any-key.press/?action=summary&path=vostok.git vostok repository web frontend -Latest version (git tag): v0.1.4 +Latest version (git tag): v0.2.0 => capsule/vostok.png What is "vostok"? ``` @@ -25,11 +25,9 @@ ______ ___ ___ _____ ___ _ __ * [v] make install * [v] mime.types * [v] redirect (31) "[.../]dir" to "[.../]dir/" (correctness of relative link) +* [v] dynamic content: VGI (CGI-like) * [ ] syslog(3) -* [ ] SNI-based routing (Server Name Indication) -* [ ] clang-tydi (lightweight standalone alternatives?) -* [ ] man pages -* [ ] Gemini redirect 3x (symbolic link on the file system/config?) +* [ ] man pages (alternatives?) * [ ] O_NONBLOCK (processing with a fixed number of threads) * [ ] pledge(2) / unveil(2) blob - 353cef1cf625b5329877f12bc183fb59a1ccf013 blob + d4e2e36973fadbdcc97e6a2063f9c64aeb638825 --- tests/test_open_file.cc +++ tests/test_open_file.cc @@ -16,7 +16,7 @@ namespace error { std::ostream dev_null{nullptr}; std::ostream &g_log = dev_null; -} // namespace +} // namespace error TEST_START(test_open_file) std::ostringstream ss; blob - 13d42a89a2991acc499aef91057a92efaf9044db blob + 0f8797281a217416a5342ab8e2ffe68d336ee0d6 --- tests/test_request.cc +++ tests/test_request.cc @@ -6,6 +6,11 @@ namespace vostok { +namespace error +{ +std::ostream &g_log = std::cout; +} // namespace error + namespace { template @@ -25,6 +30,7 @@ std::string &fill_request_buffer(Request &r, const cha } // namespace TEST_START(test_request) + Span no_prefix; Request request; std::string path; @@ -36,12 +42,12 @@ TEST_START(test_request) { auto &buffer = fill_request_buffer(request, "gemini://host"); buffer[buffer.size() - 1] = '\0'; - IS_TRUE(request.parse(path) == Request::BAD_REQUEST); + IS_TRUE(request.parse(no_prefix, path) == Request::BAD_REQUEST); } #define CASE_ERROR(url_literal, expected_result) \ fill_request_buffer(request, url_literal); \ - IS_TRUE(request.parse(path) == expected_result) + IS_TRUE(request.parse(no_prefix, path) == expected_result) CASE_ERROR("", Request::URL_TOO_SHORT); CASE_ERROR("g", Request::URL_TOO_SHORT); @@ -55,7 +61,7 @@ TEST_START(test_request) #define CASE_OK(url_literal, path_literal) \ fill_request_buffer(request, url_literal); \ - IS_TRUE(request.parse(path) == Request::URL_OK); \ + IS_TRUE(request.parse(no_prefix, path) == Request::URL_OK); \ IS_TRUE(path == path_literal) CASE_OK("gemini://host", ""); @@ -72,6 +78,13 @@ TEST_START(test_request) CASE_OK("gemini://host:1965/a/b", "a/b"); CASE_OK("gemini://host:1965/a/b/", "a/b/"); + CASE_OK("gemini://host.org:1965", ""); + CASE_OK("gemini://host.org:1965/", "/"); + CASE_OK("gemini://host.org:1965/a", "a"); + CASE_OK("gemini://host.org:1965/a/", "a/"); + CASE_OK("gemini://host.org:1965/a/b", "a/b"); + CASE_OK("gemini://host.org:1965/a/b/", "a/b/"); + CASE_OK("gemini://host/a/b/../c/./d", "a/c/d"); // RFC 3986, 3.1. Scheme @@ -79,7 +92,44 @@ TEST_START(test_request) // > should accept uppercase letters as equivalent to lowercase in scheme // > names (e.g., allow "HTTP" as well as "http") CASE_OK("GeMiNi://host", ""); + + const auto vgi_prefix = cut_null("vgi"); +#define CASE_PREFIX_MATCHED(url_literal) \ + fill_request_buffer(request, url_literal); \ + IS_TRUE(request.parse(vgi_prefix, path) == Request::URL_PATH_PREFIX_MATCHED); \ + IS_TRUE(path == (std::string{url_literal} + "\r\n")) + + CASE_PREFIX_MATCHED("gemini://host/vgi"); + CASE_PREFIX_MATCHED("gemini://host/vgi/"); + CASE_PREFIX_MATCHED("gemini://host/vgi2"); + CASE_PREFIX_MATCHED("gemini://host/vgi?"); + + CASE_PREFIX_MATCHED("gemini://host:1965/vgi"); + CASE_PREFIX_MATCHED("gemini://host:1965/vgi/"); + CASE_PREFIX_MATCHED("gemini://host:1965/vgi2"); + CASE_PREFIX_MATCHED("gemini://host:1965/vgi?"); + + CASE_PREFIX_MATCHED("gemini://host.org:1965/vgi"); + CASE_PREFIX_MATCHED("gemini://host.org:1965/vgi/"); + CASE_PREFIX_MATCHED("gemini://host.org:1965/vgi2"); + CASE_PREFIX_MATCHED("gemini://host.org:1965/vgi?"); + +#define CASE_PREFIX_NOT_MATCHED(url_literal) \ + fill_request_buffer(request, url_literal); \ + IS_TRUE(request.parse(vgi_prefix, path) == Request::URL_OK); \ + + CASE_PREFIX_NOT_MATCHED("gemini://host/Xgi"); + CASE_PREFIX_NOT_MATCHED("gemini://host/vgX"); + CASE_PREFIX_NOT_MATCHED("gemini://host/_vgi"); + CASE_PREFIX_NOT_MATCHED("gemini://host/VGi"); + + // debatable, but so far + CASE_PREFIX_NOT_MATCHED("gemini://host/./vgi"); + + // debatable, but so far + CASE_PREFIX_NOT_MATCHED("gemini://host/abc/../vgi"); + TEST_END() } // namespace vostok blob - 36642223663b3697adb5642f2be7ef895c432922 blob + ea04154c4340d36d28532d3551a993ad32703778 --- vostok/args.cc +++ vostok/args.cc @@ -29,7 +29,8 @@ bool usage(const char *program) error::g_log << "\t-k FILE : Server key file [REQUIRED]" << std::endl; error::g_log << "\t-f PATH : Path to file system data [REQUIRED]" << std::endl; error::g_log << "\t-m FILE : Path to file mime.types" << std::endl; - error::g_log << "\t-g PATH : VGI (CGI-like) path part. Must NOT contain a separator (slash: /)" << std::endl; + error::g_log << "\t-g PATH : VGI (CGI-like) path prefix (case sensitive, without normalization)" << std::endl; + error::g_log << "\t-e PATH : VGI (CGI-like) execution command" << std::endl; return false; } @@ -46,7 +47,7 @@ parse_command_line_arguments( { int ch; char *p = nullptr; - while ((ch = getopt(argc, argv, "a:p:c:k:f:m:g:")) != -1) { + while ((ch = getopt(argc, argv, "a:p:c:k:f:m:g:e:")) != -1) { switch (ch) { case 'a': args.addr = optarg; @@ -85,8 +86,11 @@ parse_command_line_arguments( return false; break; case 'g': - args.vgi = optarg; + args.vgi_prefix = Span{optarg, strlen(optarg)}; break; + case 'e': + args.vgi_command = optarg; + break; default: return usage(argv[0]); @@ -107,6 +111,12 @@ parse_command_line_arguments( error::g_log << "Invalid command line: -d option required" << std::endl; return usage(argv[0]); } + if ( (args.vgi_prefix.size() && !args.vgi_command) || + (!args.vgi_prefix.size() && args.vgi_command) ) + { + error::g_log << "Invalid command line: options -g and -e can only be specified together" << std::endl; + return usage(argv[0]); + } return true; } blob - 83e0e5c50e2ac5ce8db054983ad855cb281b9f34 blob + 2bf92b3e7031734df1739a5d94504757f3b5c877 --- vostok/args.h +++ vostok/args.h @@ -18,7 +18,8 @@ struct CommandLineArguments czstring key_file{nullptr}; UniqueFd directory; Mime mime; - czstring vgi{nullptr}; + Span vgi_prefix; + czstring vgi_command{nullptr}; }; blob - 7f087caa76e95ee647bee77a1c0d230aad3f151a blob + d25fb31f943c5036e49c94ca947d23cfc1c9b550 --- vostok/gemini.cc +++ vostok/gemini.cc @@ -14,6 +14,7 @@ const std::array SPACE{' '}; const Status STATUS_20_SUCCESS{'2', '0'}; const Status STATUS_31_REDIRECT_PERMANENT{'3', '1'}; const Status STATUS_40_TEMPORARY_FAILURE{'4', '0'}; +const Status STATUS_42_CGI_ERROR{'4', '2'}; const Status STATUS_50_PERMANENT_FAILURE{'5', '0'}; const Status STATUS_51_NOT_FOUND{'5', '1'}; const Status STATUS_53_PROXY_REQUEST_REFUSED{'5', '3'}; blob - cc773ff8a5e870b10d30c9be82bc14f75c33ec24 blob + 4ed75c6b759a9db6f15638f3911a3a6942c3c206 --- vostok/gemini.h +++ vostok/gemini.h @@ -21,6 +21,7 @@ using Status = std::array; extern const Status STATUS_20_SUCCESS; extern const Status STATUS_31_REDIRECT_PERMANENT; extern const Status STATUS_40_TEMPORARY_FAILURE; +extern const Status STATUS_42_CGI_ERROR; extern const Status STATUS_50_PERMANENT_FAILURE; extern const Status STATUS_51_NOT_FOUND; extern const Status STATUS_53_PROXY_REQUEST_REFUSED; blob - 12ac2a5cf55579b9ed761481c1a951dc0560f983 blob + b8f42bafd86a8ee7dfe9eda918ee320864350783 --- vostok/request.cc +++ vostok/request.cc @@ -16,18 +16,8 @@ namespace { const auto gemini_scheme = cut_null("gemini://"); -inline bool is_gemini_scheme(const std::string &url) -{ - return std::equal( - gemini_scheme.begin(), - gemini_scheme.end(), - url.begin(), - [](char v1, char v2){return std::tolower(v1) == std::tolower(v2);} - ); -} - -bool cut_crlf(std::string &buffer) +Span cut_crlf(const std::string &buffer) { // > servers MUST ignore anything sent after the first occurrence of a . for (auto current = buffer.cbegin(); current != buffer.cend(); ++current) @@ -37,12 +27,9 @@ bool cut_crlf(std::string &buffer) break; if (*current == gemini::CRLF[0] && *next == gemini::CRLF[1]) - { - buffer.resize(current - buffer.cbegin()); - return true; - } + return Span{buffer.data(), static_cast(current - buffer.cbegin())}; } - return false; + return Span{}; } @@ -151,32 +138,66 @@ std::string &Request::get_buffer() } -Request::ParseResult Request::parse(/* out */ std::string &path) +Request::ParseResult +Request::parse( + /* in */ const Span &stop_if_prefix, + /* out */ std::string &path +) { - if (!cut_crlf(m_buffer)) - return BAD_REQUEST; - - // check and skip scheme if (m_buffer.size() < gemini_scheme.size()) return URL_TOO_SHORT; - if (!is_gemini_scheme(m_buffer)) - return URL_NON_GEMINI; - auto current = m_buffer.cbegin() + gemini_scheme.size(); - // skip domain[:port] - for (; current != m_buffer.cend(); ++current) + std::string::size_type path_from; + std::string::size_type size_without_crlf; { - if (*current == '/') + const auto buffer = cut_crlf(m_buffer); + if (!buffer.size()) + return BAD_REQUEST; + + // check and skip scheme + if (buffer.size() < gemini_scheme.size()) + return URL_TOO_SHORT; + const bool is_gemini_scheme = + std::equal( + gemini_scheme.begin(), + gemini_scheme.end(), + buffer.begin(), + [](char v1, char v2){return std::tolower(v1) == std::tolower(v2);} + ); + if (!is_gemini_scheme) + return URL_NON_GEMINI; + + auto current = buffer.begin() + gemini_scheme.size(); + // skip domain[:port] + for (; current != buffer.end(); ++current) { - ++current; - break; + if (*current == '/') + { + ++current; + break; + } } - } + if (stop_if_prefix.size() && current != buffer.end()) + { + if (stop_if_prefix.size() <= static_cast(buffer.end() - current)) + { + if (std::equal(stop_if_prefix.begin(), stop_if_prefix.end(), current)) + { + m_buffer.swap(path); + return URL_PATH_PREFIX_MATCHED; + } + } + } + + path_from = current - buffer.begin(); + size_without_crlf = buffer.size(); + } + m_buffer.resize(size_without_crlf); const bool is_slash_on_end = (*m_buffer.crbegin() == '/'); // normalize '.' and '..' - const auto ret = PathNormalization{}(current, m_buffer); + const auto ret = PathNormalization{}(m_buffer.begin() + path_from, m_buffer); if (ret == URL_OK) { m_buffer.swap(path); blob - 1359056f3684043421faf449d2d868ff02af7caa blob + 30078df115d1488dfe1a1aec8f7c4023b475e4e7 --- vostok/request.h +++ vostok/request.h @@ -1,5 +1,6 @@ /** Gemini request parser */ +#include "utils.h" #include #pragma once @@ -15,18 +16,25 @@ class Request (public) /* Get buffer for incoming Gemini request */ std::string &get_buffer(); - /* Parse incoming Gemini request - and return normalized path as zero-terminated string (if URL_OK) */ + /* Parse incoming Gemini request and return: + if URL_OK: normalized path as zero-terminated string + if URL_PATH_PREFIX_MATCHED: "raw" Gemini request string + */ enum ParseResult { URL_OK, + URL_PATH_PREFIX_MATCHED, BAD_REQUEST, URL_TOO_SHORT, URL_NON_GEMINI, URL_ROOT_TRAVERSE, }; - ParseResult parse(/* out */ std::string &path); + ParseResult + parse( + /* in */ const Span &stop_if_prefix, + /* out */ std::string &path + ); private: std::string m_buffer; blob - 3ffe1270ee87afe1bc69f3991207856e1f05eaf4 blob + 37162dd8e110c679705509a62f7d62fc46447ccf --- vostok/vostok.cc +++ vostok/vostok.cc @@ -8,6 +8,7 @@ #include "gemini.h" #include +#include #include #include #include @@ -35,8 +36,30 @@ const std::string ROOT{"/"}; } // namespace meta const std::string g_index_gmi{"index.gmi"}; +const auto ERROR42_ANSWER = cut_null("42 Temporary failure\r\n"); +struct ProcessRequestContext +{ + int directory_fd; + const Mime &mime; + const Span &vgi_prefix; + czstring vgi_command; + + ProcessRequestContext( + int directory_fd, + const Mime &mime, + const Span &vgi_prefix, + czstring vgi_command + ) : directory_fd(directory_fd) + , mime(mime) + , vgi_prefix(vgi_prefix) + , vgi_command(vgi_command) + { + } +}; + + bool send_response(NotNull ctx, gemini::Status status, const std::string &meta) { // @@ -52,8 +75,117 @@ bool send_response(NotNull ctx, gemini:: } -void client_thread(const transport::AcceptedClient *accepted_client, int directory_fd, const Mime &mime) +void +process_gateway_request( + const transport::AcceptedClient &accepted_client, + const std::string &path, + const ProcessRequestContext &context +) { + int stdin_pair[2]; + if (pipe(stdin_pair) != 0) + { + error::occurred("Create child stdin", error::Print{}); + send_response(accepted_client.get_ctx(), gemini::STATUS_42_CGI_ERROR, meta::TEMPORARY_FAILURE); + return; + } + UniqueFd stdin_read{stdin_pair[0]}; + UniqueFd stdin_write{stdin_pair[1]}; + + + int stdout_pair[2]; + if (pipe(stdout_pair) != 0) + { + error::occurred("Create child stdout", error::Print{}); + send_response(accepted_client.get_ctx(), gemini::STATUS_42_CGI_ERROR, meta::TEMPORARY_FAILURE); + return; + } + UniqueFd stdout_read{stdout_pair[0]}; + UniqueFd stdout_write{stdout_pair[1]}; + + const auto child_pid = fork(); + if (child_pid == -1) + { + error::occurred("Fork VGI", error::Print{}); + send_response(accepted_client.get_ctx(), gemini::STATUS_42_CGI_ERROR, meta::TEMPORARY_FAILURE); + return; + } + + if (child_pid == 0) + { + // child process (VGI) + if (dup2(stdin_read.get(), STDIN_FILENO) == -1) + { + write(stdout_write.get(), ERROR42_ANSWER.data(), ERROR42_ANSWER.size()); + return; + } + stdin_read.reset(); + stdin_write.reset(); + + if (dup2(stdout_write.get(), STDOUT_FILENO) == -1) + { + write(stdout_write.get(), ERROR42_ANSWER.data(), ERROR42_ANSWER.size()); + return; + } + stdout_read.reset(); + stdout_write.reset(); + + execl(context.vgi_command, context.vgi_command, nullptr); + + // if `execl` return, an error has occurred + write(STDOUT_FILENO, ERROR42_ANSWER.data(), ERROR42_ANSWER.size()); + exit(1); + } + // parent process + + if (write(stdin_write.get(), path.data(), path.size()) == -1) + { + error::occurred("Write to stdin of the child process", error::Print{}); + send_response(accepted_client.get_ctx(), gemini::STATUS_42_CGI_ERROR, meta::TEMPORARY_FAILURE); + return; + } + stdin_read.reset(); + stdin_write.reset(); + + stdout_write.reset(); // make sure entry is closed to get EOF + bool was_write = false; + std::vector buffer; + buffer.resize(64 * 1024); + for (; ; ) + { + const auto ret = read(stdout_read.get(), buffer.data(), buffer.size()); + if (ret == -1) + { + error::occurred("Read from stdout of the child process", error::Print{}); + if (!was_write) + send_response(accepted_client.get_ctx(), gemini::STATUS_42_CGI_ERROR, meta::TEMPORARY_FAILURE); + return; + } + const auto readed = static_cast(ret); + if (readed == 0) + break; // EOF + if (!transport::send(accepted_client.get_ctx(), Span{buffer.data(), readed})) + return; + was_write = true; + } + + int child_status{0}; + for (; ; ) + { + waitpid(child_pid, &child_status, 0); + if (WIFEXITED(child_status)) + break; + } + error::g_log << "VGI command return " << std::dec << WEXITSTATUS(child_status) << std::endl; +} + + +void +client_thread( + const transport::AcceptedClient *accepted_client, + const ProcessRequestContext &context +) +{ assert(accepted_client); std::unique_ptr accepted_client_deleter{accepted_client}; @@ -62,7 +194,7 @@ void client_thread(const transport::AcceptedClient *ac return; std::string path; - const auto parse_result = request.parse(path); + const auto parse_result = request.parse(context.vgi_prefix, path); switch (parse_result) { case Request::BAD_REQUEST: @@ -82,13 +214,17 @@ void client_thread(const transport::AcceptedClient *ac send_response(accepted_client->get_ctx(), gemini::STATUS_50_PERMANENT_FAILURE, meta::ROOT_TRAVERSE); return; + case Request::URL_PATH_PREFIX_MATCHED: + process_gateway_request(*accepted_client, path, context); + return; + case Request::URL_OK: break; } UniqueFd opened_dir; UniqueFd opened_fd; - auto open_file_result = open_file::open(directory_fd, path, opened_fd); + auto open_file_result = open_file::open(context.directory_fd, path, opened_fd); const std::string *opened_path{nullptr}; switch (open_file_result) { @@ -136,7 +272,7 @@ void client_thread(const transport::AcceptedClient *ac } // from mime.types or "text/gemini" (default). - const auto _mime_type = mime.lookup(*opened_path); + const auto _mime_type = context.mime.lookup(*opened_path); const std::string &meta_string = _mime_type ? *_mime_type : meta::TEXT_GEMINI; send_response(accepted_client->get_ctx(), gemini::STATUS_20_SUCCESS, meta_string); @@ -170,7 +306,12 @@ void client_thread(const transport::AcceptedClient *ac } -bool server_loop(int server_socket, NotNullserver_ctx, int directory_fd, const Mime &mime) +bool +server_loop( + int server_socket, + NotNullserver_ctx, + const ProcessRequestContext &context +) { error::g_log << "🚀 Vostok server listening..." << std::endl; for (; ; ) @@ -188,7 +329,7 @@ bool server_loop(int server_socket, NotNull $(cat -)\r"