commit 917c32a4f0ec162835da762d7dd0afdb11d69392 from: Aleksey Ryndin date: Tue Aug 15 15:24:31 2023 UTC Path (from URL) normalization commit - 9a6cb9d51e84cd86ea58319a30046fc2f434246d commit + 917c32a4f0ec162835da762d7dd0afdb11d69392 blob - 860b2a9fb3270c16c3135fc92a26426bbef9a3e7 blob + 49d2831bde02a0492e0c54f8da84be81887302e4 --- Makefile +++ Makefile @@ -7,3 +7,6 @@ clean: server: ${MAKE} -C vostokd + +run_server: server + ./vostokd/vostokd -c cert/server.crt -k cert/server.key blob - b9362c84dc012a844b06927892c48b03ef6817a1 blob + 611e231ad28b76cc73de58ba46028d9a0be60ff0 --- shared/span +++ shared/span @@ -40,6 +40,12 @@ class span (public) return (offset < m_count) ? span{m_p + offset, m_count - offset} : span{}; } + element_type &operator[](size_t idx) const + { + assert(idx < m_count); + return m_p[idx]; + } + private: element_type *m_p; size_t m_count; blob - bbfa867bfda0bfeec12c6582edd249a27ade03a0 blob + 786dcf7a7c39c38a3e601a0c8a7d966355dfc76a --- vostokd/url_normalization.cc +++ vostokd/url_normalization.cc @@ -2,19 +2,112 @@ #include "url_normalization.h" #include "cut_null" +#include "error.h" +#include + namespace vostok { + namespace { + const auto gemini_scheme = cut_null("gemini://"); -} // namespace +class path_normalization +{ +public: + using path_components_t = std::list< span >; -url_normalization_result parse_url(span url, span &path) + url_normalization_result operator() (span url_path, zs_url_path_t &zs_url_path); + +protected: + url_normalization_result on_component(); + url_normalization_result on_component_ok() + { + m_inprogress = decltype(m_inprogress){}; + return url_ok; + } + + void fill(zs_url_path_t &zs_url_path) const; + +private: + path_components_t::value_type m_inprogress; + path_components_t m_result; +}; + + +url_normalization_result path_normalization::operator() (span url_path, zs_url_path_t &zs_url_path) { + m_inprogress = path_components_t::value_type{nullptr, 0}; + m_result.clear(); + + for (auto p = url_path.begin(); p != url_path.end(); ++p) + { + if (*p != '/') + { + if (m_inprogress.size()) + m_inprogress = decltype(m_inprogress){m_inprogress.begin(), m_inprogress.size() + 1}; + else + m_inprogress = decltype(m_inprogress){p, 1}; + continue; + } + + const auto parse_result = on_component(); + if (parse_result != url_ok) + return parse_result; + } + const auto parse_result = on_component(); + if (parse_result != url_ok) + return parse_result; + + fill(zs_url_path); + return url_ok; +} + + +url_normalization_result path_normalization::on_component() +{ + if ((m_inprogress.size() == 0) || (m_inprogress.size() == 1 && m_inprogress[0] == '.')) + { + return on_component_ok(); + } + if (m_inprogress.size() == 2 && m_inprogress[0] == '.' && m_inprogress[1] == '.') + { + if (m_result.empty()) + return url_root_traverse; + + m_result.pop_back(); + return on_component_ok(); + } + + m_result.push_back(std::move(m_inprogress)); + return on_component_ok(); +} + +void path_normalization::fill(zs_url_path_t &zs_url_path) const +{ + auto current = zs_url_path.begin(); + for (auto it = m_result.cbegin(); it != m_result.cend(); ++it) + { + if (current != zs_url_path.begin()) + { + // non-first path component: insert separator + *current = '/'; + ++current; + } + current = std::copy(it->begin(), it->end(), current); + } + *current = '\0'; +} + +} // namespace + + +url_normalization_result parse_url(span url, zs_url_path_t &zs_url_path) +{ // check and skip scheme if (url.size() < gemini_scheme.size()) return url_too_short; @@ -36,8 +129,9 @@ url_normalization_result parse_url(span url, spa if (current == url.end()) url = decltype(url){}; - path = url; - return url_ok; + // normalize '.' and '..' + path_normalization normalizer; + return normalizer(url, zs_url_path); } blob - 9d1800b61158c3a0756ab9deb68803d60e1d92eb blob + 7cda673779438908d7cb51350167a1a8c9d82c4c --- vostokd/url_normalization.h +++ vostokd/url_normalization.h @@ -1,6 +1,7 @@ /** URL normalization */ #include "span" +#include "gemini.h" #pragma once @@ -19,9 +20,11 @@ enum url_normalization_result url_root_traverse, }; +/** Zero-terminated path from gemini URL */ +using zs_url_path_t = std::array; -/** Inplace: extract normalized path from URL */ -url_normalization_result parse_url(span url, span &path); +/** Extract normalized path from URL as list null-terminated string */ +url_normalization_result parse_url(span url, zs_url_path_t &zs_url_path); } // namespace vostok blob - d2bbca5e948ac44e480e5a641162aa6e918408c8 blob + 1feff3e2ae1dc163916656fc132100293d01c78d --- vostokd/vostokd.cc +++ vostokd/vostokd.cc @@ -32,10 +32,17 @@ constexpr span cut_null(const char (&arr)[ namespace meta { -const auto bad_request = cut_null("Bad request"); -const auto url_too_short = cut_null("URL is too short"); -const auto non_gemini = cut_null("No proxying to non-Gemini content"); -const auto root_traverse = cut_null("Wrong traverse"); +const char sz_bad_request[] = "Bad request"; +const auto bad_request = cut_null(sz_bad_request); + +const char sz_url_too_short[] = "URL is too short"; +const auto url_too_short = cut_null(sz_url_too_short); + +const char sz_non_gemini[] = "No proxying to non-Gemini content"; +const auto non_gemini = cut_null(sz_non_gemini); + +const char sz_root_traverse[] = "Wrong traverse"; +const auto root_traverse = cut_null(sz_root_traverse); } // namespace meta @@ -52,17 +59,20 @@ void client_thread(transport::accepted_context::value return; } - span path_from_url; - const auto parse_result = parse_url(url, path_from_url); + zs_url_path_t zs_url_path; + const auto parse_result = parse_url(url, zs_url_path); switch (parse_result) { case url_too_short: + error::occurred("parse URL", []{error::g_log << meta::sz_url_too_short << "." << std::endl;}); transport::send_response(ctx.get_ctx(), gemini::STATUS_59_BAD_REQUEST, meta::url_too_short); return; case url_non_gemini: + error::occurred("parse URL", []{error::g_log << meta::sz_non_gemini << "." << std::endl;}); transport::send_response(ctx.get_ctx(), gemini::STATUS_53_PROXY_REQUEST_REFUSED, meta::non_gemini); return; case url_root_traverse: + error::occurred("parse URL", []{error::g_log << meta::sz_root_traverse << "." << std::endl;}); transport::send_response(ctx.get_ctx(), gemini::STATUS_50_PERMANENT_FAILURE, meta::root_traverse); return; @@ -70,10 +80,7 @@ void client_thread(transport::accepted_context::value break; } - error::g_log << "Requested URL: \""; - for (auto c : path_from_url) - error::g_log << c; - error::g_log << "\"" << std::endl; + error::g_log << "Requested URL: \"" << zs_url_path.data() << "\"" << std::endl; // > If is an empty string, the MIME type MUST default to "text/gemini; charset=utf-8".