commit 51254cccedb5bd0fe1e68c8d11c0af9941de9b73 from: Aleksey Ryndin date: Wed Aug 30 08:01:26 2023 UTC Refactoring: parse_url -> extract_url_path commit - 4a354f2fcb6d0403fad19c421b6c2839dddfb227 commit + 51254cccedb5bd0fe1e68c8d11c0af9941de9b73 blob - f5a9446bb7a163bd0e27552e7d77ae26c8e1aaab blob + 3ae18a4e563aa1dbdc476b126a710a87fdc15ad2 --- tests/test_parse_url.cc +++ tests/test_parse_url.cc @@ -7,53 +7,62 @@ namespace vostok { +namespace +{ +template +void init(std::vector &v, const char (&arr)[N], bool cut_null=false) +{ + static_assert(N > 0, "N"); + const auto n = cut_null ? (N - 1) : N; + v.resize(n); + std::copy(&arr[0], &arr[n], v.begin()); +} +} // namespace -TEST_START(test_parse_url) - zs_url_path_t zs_url_path; +TEST_START(test_extract_url_path) + std::vector url, expected; - IS_TRUE(parse_url(cut_null("gemini://host"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "") == 0); +#define CASE_OK(url_literal, path_literal) \ + init(url, url_literal, true); \ + init(expected, path_literal); \ + IS_TRUE(extract_url_path(url) == url_ok && (url.size() == expected.size() && std::equal(url.cbegin(), url.cend(), expected.cbegin()))) - IS_TRUE(parse_url(cut_null("gemini://host/"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "") == 0); + CASE_OK("gemini://host", ""); + CASE_OK("gemini://host/", ""); + CASE_OK("gemini://host/a", "a"); + CASE_OK("gemini://host/a/", "a"); + CASE_OK("gemini://host/a/b", "a/b"); + CASE_OK("gemini://host/a/b/", "a/b"); - IS_TRUE(parse_url(cut_null("gemini://host/a"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "a") == 0); + CASE_OK("gemini://host:1965", ""); + CASE_OK("gemini://host:1965/", ""); + CASE_OK("gemini://host:1965/a", "a"); + CASE_OK("gemini://host:1965/a/", "a"); + CASE_OK("gemini://host:1965/a/b", "a/b"); + CASE_OK("gemini://host:1965/a/b/", "a/b"); - IS_TRUE(parse_url(cut_null("gemini://host/a/b"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "a/b") == 0); + CASE_OK("gemini://host/a/b/../c/./d", "a/c/d"); - IS_TRUE(parse_url(cut_null("gemini://host:port"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "") == 0); + // RFC 3986, 3.1. Scheme + // > ... An implementation + // > should accept uppercase letters as equivalent to lowercase in scheme + // > names (e.g., allow "HTTP" as well as "http") + CASE_OK("GeMiNi://host", ""); - IS_TRUE(parse_url(cut_null("gemini://host:port/"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "") == 0); +#define CASE_ERROR(url_literal, expected_result) \ + init(url, url_literal, true); \ + IS_TRUE(extract_url_path(url) == expected_result) - IS_TRUE(parse_url(cut_null("gemini://host:port/a"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "a") == 0); + CASE_ERROR("", url_too_short); + CASE_ERROR("g", url_too_short); + CASE_ERROR("gemini:/", url_too_short); - IS_TRUE(parse_url(cut_null("gemini://host:port/a/b"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "a/b") == 0); + CASE_ERROR("gemini1://", url_non_gemini); + CASE_ERROR("semini://", url_non_gemini); - IS_TRUE(parse_url(cut_null(""), zs_url_path) == url_too_short); - IS_TRUE(parse_url(cut_null("g"), zs_url_path) == url_too_short); - IS_TRUE(parse_url(cut_null("gemini:/"), zs_url_path) == url_too_short); + CASE_ERROR("gemini://host/../secret.txt", url_root_traverse); + CASE_ERROR("gemini://host/dir/../../secret.txt", url_root_traverse); - IS_TRUE(parse_url(cut_null("gemini1://"), zs_url_path) == url_non_gemini); - IS_TRUE(parse_url(cut_null("semini://"), zs_url_path) == url_non_gemini); - - IS_TRUE(parse_url(cut_null("gemini://host/../secret.txt"), zs_url_path) == url_root_traverse); - IS_TRUE(parse_url(cut_null("gemini://host/dir/../../secret.txt"), zs_url_path) == url_root_traverse); - - IS_TRUE(parse_url(cut_null("gemini://host/a/b/../c/./d"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "a/c/d") == 0); - - // RFC 3986, 3.1. Scheme - // > ... An implementation - // > should accept uppercase letters as equivalent to lowercase in scheme - // > names (e.g., allow "HTTP" as well as "http") - IS_TRUE(parse_url(cut_null("GeMiNi://host"), zs_url_path) == url_ok - && strcmp(zs_url_path.begin(), "") == 0); TEST_END() } // namespace vostok @@ -63,5 +72,5 @@ extern "C" int main(int, char **) { - return vostok::test_parse_url(); + return vostok::test_extract_url_path(); } blob - 986a8632198ea80a95b2590c2c99dfd5ce756a35 blob + 1aaa90926bb3db42ac911c8ea9084866c41cc1a5 --- vostok/parse_url.cc +++ vostok/parse_url.cc @@ -4,6 +4,7 @@ #include "error.h" #include +#include #include @@ -14,7 +15,7 @@ namespace { const auto gemini_scheme = cut_null("gemini://"); -inline bool is_gemini_scheme(span url) +inline bool is_gemini_scheme(const std::vector &url) { return std::equal( gemini_scheme.begin(), @@ -29,7 +30,11 @@ class path_normalization public: using path_components_t = std::list< span >; - url_normalization_result operator() (span url_path, zs_url_path_t &zs_url_path); + url_normalization_result + operator() ( + std::vector::const_iterator p, + std::vector &url + ); protected: url_normalization_result on_component(); @@ -39,7 +44,7 @@ class path_normalization return url_ok; } - void fill(zs_url_path_t &zs_url_path) const; + void fill(std::vector &url) const; private: path_components_t::value_type m_inprogress; @@ -47,19 +52,23 @@ class path_normalization }; -url_normalization_result path_normalization::operator() (span url_path, zs_url_path_t &zs_url_path) +url_normalization_result +path_normalization::operator() ( + std::vector::const_iterator p, + std::vector &url +) { m_inprogress = path_components_t::value_type{nullptr, 0}; m_result.clear(); - for (auto p = url_path.begin(); p != url_path.end(); ++p) + for (; p != url.cend(); ++p) { if (*p != '/') { if (m_inprogress.size()) m_inprogress = decltype(m_inprogress){m_inprogress.begin(), m_inprogress.size() + 1}; else - m_inprogress = decltype(m_inprogress){p, 1}; + m_inprogress = decltype(m_inprogress){&*p, 1}; continue; } @@ -71,7 +80,7 @@ url_normalization_result path_normalization::operator( if (parse_result != url_ok) return parse_result; - fill(zs_url_path); + fill(url); return url_ok; } @@ -95,12 +104,12 @@ url_normalization_result path_normalization::on_compon return on_component_ok(); } -void path_normalization::fill(zs_url_path_t &zs_url_path) const +void path_normalization::fill(std::vector &url) const { - auto current = zs_url_path.begin(); + auto current = url.begin(); for (auto it = m_result.cbegin(); it != m_result.cend(); ++it) { - if (current != zs_url_path.begin()) + if (current != url.begin()) { // non-first path component: insert separator *current = '/'; @@ -109,37 +118,39 @@ void path_normalization::fill(zs_url_path_t &zs_url_pa current = std::copy(it->begin(), it->end(), current); } *current = '\0'; + ++current; + + url.resize(current - url.begin()); } } // namespace -url_normalization_result parse_url(span url, zs_url_path_t &zs_url_path) +url_normalization_result +extract_url_path( + /* in/out */ std::vector &url +) { // check and skip scheme if (url.size() < gemini_scheme.size()) return url_too_short; if (!is_gemini_scheme(url)) return url_non_gemini; - url = url.subspan(gemini_scheme.size()); + auto current = url.cbegin() + gemini_scheme.size(); // skip domain[:port] - const char *current = url.begin(); - for (; current != url.end(); ++current) + for (; current != url.cend(); ++current) { if (*current == '/') { - const auto skip_len = (current + 1) - url.begin(); - url = url.subspan(skip_len); + ++current; break; } } - if (current == url.end()) - url = decltype(url){}; // normalize '.' and '..' path_normalization normalizer; - return normalizer(url, zs_url_path); + return normalizer(current, url); } blob - 8d8b31570d606dd105c6888b6b7546eb51b7e026 blob + eee8490d9b37cbbf3be882e1591c5b26f75531d0 --- vostok/parse_url.h +++ vostok/parse_url.h @@ -3,7 +3,9 @@ #include "utils.h" #include "gemini.h" +#include + #pragma once @@ -23,8 +25,11 @@ enum url_normalization_result /** Zero-terminated path from gemini URL */ using zs_url_path_t = std::array; -/** Extract normalized path from URL as list null-terminated string */ -url_normalization_result parse_url(span url, zs_url_path_t &zs_url_path); +/** Extract normalized path from URL as null-terminated string (inplace) */ +url_normalization_result +extract_url_path( + /* in/out */ std::vector &url +); } // namespace vostok blob - f0cd56105db826dd94ffb4065683264163df99da blob + 51ee360ea9d26a76b2e8f366dbb3d17a9967b0ec --- vostok/transport.cc +++ vostok/transport.cc @@ -18,12 +18,12 @@ using config_t = std::unique_ptr ctx, span &buff) +bool read(not_null ctx, std::vector &buff) { ssize_t ret{}; for (; ; ) { - ret = tls_read(ctx, buff.begin(), buff.size()); + ret = tls_read(ctx, buff.data(), buff.size()); if (ret == TLS_WANT_POLLIN || ret == TLS_WANT_POLLOUT) continue; break; @@ -41,7 +41,7 @@ bool read(not_null ctx, span &buff ); return false; } - buff = buff.first(ret); + buff.resize(ret); return true; } @@ -147,26 +147,29 @@ accepted_client::accepted_client(int server_socket, st } -span read_request(not_null ctx, std::array &buffer) +bool read_request(not_null ctx, std::vector &url) { - span request{buffer}; - if (!read(ctx, request)) - return {}; + // + url.resize(gemini::MAX_REQUEST_LENGTH); + if (!read(ctx, url)) + return false; - for (auto current = request.begin(); current < request.end(); ++current) + for (auto current = url.begin(); current < url.end(); ++current) { const auto next = (current + 1); - if (next == request.end()) + if (next == url.end()) break; if (*current == gemini::CRLF[0] && *next == gemini::CRLF[1]) { // > servers MUST ignore anything sent after the first occurrence of a . - return span{request.begin(), static_cast(current - request.begin())}; + url.resize(current - url.begin()); + return true; } } + error::occurred("Parse request", error::none{}); - return {}; + return false; } blob - b1c2577de7e8aff3a85580b7e0fa96d2226f98cb blob + 22a608f8129218141bf49b9cc068aad1526d29c9 --- vostok/transport.h +++ vostok/transport.h @@ -3,7 +3,7 @@ #include "utils.h" #include "gemini.h" -#include +#include #include #pragma once @@ -44,8 +44,8 @@ class accepted_client (private) }; -/** Read genimi request and return url (empty url - error) */ -span read_request(not_null ctx, std::array &buffer); +/** Read genimi request and return url */ +bool read_request(not_null ctx, std::vector &url); /** Write gemini response */ @@ -57,4 +57,4 @@ bool send(not_null ctx, span } // namespace transport -} // namespace vostok +} // namespace vostok blob - 5e4f404de41c1332628cda9dfd8386191e71d32f blob + a5bb489e81d2f77428c326766a2fdcc78353f61a --- vostok/vostok.cc +++ vostok/vostok.cc @@ -46,16 +46,15 @@ void client_thread(const transport::accepted_client *a assert(accepted_client); std::unique_ptr accepted_client_deleter{accepted_client}; - std::array buffer; - auto url = transport::read_request(accepted_client->get_ctx(), buffer); - if (!url.size()) + std::vector url; + if (!transport::read_request(accepted_client->get_ctx(), url)) { transport::send_response(accepted_client->get_ctx(), gemini::STATUS_59_BAD_REQUEST, meta::bad_request); return; } - zs_url_path_t zs_url_path; - const auto parse_result = parse_url(url, zs_url_path); + + const auto parse_result = extract_url_path(url); switch (parse_result) { case url_too_short: @@ -76,7 +75,7 @@ void client_thread(const transport::accepted_client *a } unique_fd opened_file{}; - const auto open_file_result = open_file(directory_fd, zs_url_path.data(), opened_file); + const auto open_file_result = open_file(directory_fd, url.data(), opened_file); switch (open_file_result) { case file_not_found: @@ -93,15 +92,18 @@ void client_thread(const transport::accepted_client *a // > If is an empty string, the MIME type MUST default to "text/gemini; charset=utf-8". transport::send_response(accepted_client->get_ctx(), gemini::STATUS_20_SUCCESS, {}); + + std::vector buffer; + buffer.resize(64 * 1024); for (; ; ) { const auto ret = read(opened_file.get(), buffer.data(), buffer.size()); if (ret == -1) { error::occurred( - [&zs_url_path] + [&url] { - error::g_log << "Read file \"" << zs_url_path.data() << "\""; + error::g_log << "Read file \"" << url.data() << "\""; }, error::print{} ); @@ -116,7 +118,7 @@ void client_thread(const transport::accepted_client *a if (readed < buffer.size()) break; } - error::g_log << "20 " << "\"" << zs_url_path.data() << "\"" << std::endl; + error::g_log << "20 " << "\"" << url.data() << "\"" << std::endl; }