diff --git a/include/boost/url/detail/any_segments_iter.hpp b/include/boost/url/detail/any_segments_iter.hpp index 129aee9eb..1d8d65c2a 100644 --- a/include/boost/url/detail/any_segments_iter.hpp +++ b/include/boost/url/detail/any_segments_iter.hpp @@ -71,47 +71,6 @@ struct BOOST_SYMBOL_VISIBLE char const* end) noexcept = 0; }; -//------------------------------------------------ - -// iterates segments in a string -struct BOOST_SYMBOL_VISIBLE - path_iter - : any_segments_iter -{ - virtual ~path_iter() = default; - - explicit - path_iter( - string_view s) noexcept; - -protected: - std::size_t pos_; - std::size_t next_; - - void increment() noexcept; - void rewind() noexcept override; - bool measure(std::size_t&) noexcept override; - void copy(char*&, char const*) noexcept override; -}; - -//------------------------------------------------ - -// iterates segments in an encoded string -struct BOOST_SYMBOL_VISIBLE - path_encoded_iter - : public path_iter -{ - virtual ~path_encoded_iter() = default; - - explicit - path_encoded_iter( - pct_string_view s) noexcept; - -private: - bool measure(std::size_t&) noexcept override; - void copy(char*&, char const*) noexcept override; -}; - //------------------------------------------------ // // segment_iter diff --git a/include/boost/url/detail/impl/any_segments_iter.ipp b/include/boost/url/detail/impl/any_segments_iter.ipp index ee8595bb4..1ad8405cf 100644 --- a/include/boost/url/detail/impl/any_segments_iter.ipp +++ b/include/boost/url/detail/impl/any_segments_iter.ipp @@ -19,194 +19,6 @@ namespace boost { namespace urls { namespace detail { -//------------------------------------------------ -// -// path_iter -// -//------------------------------------------------ - -path_iter:: -path_iter( - string_view s_) noexcept - : any_segments_iter(s_) -{ - rewind(); -} - -void -path_iter:: -increment() noexcept -{ - pos_ = next_; - if(pos_ == s.size()) - { - pos_ = string_view::npos; - return; - } - // skip '/' - ++pos_; - auto const end = - s.data() + s.size(); - auto const p0 = - s.data() + pos_; - auto p = p0; - while(p != end) - { - if(*p == '/') - { - next_ = p - s.data(); - return; - } - ++p; - } - next_ = s.size(); -} - -void -path_iter:: -rewind() noexcept -{ - pos_ = 0; - auto p0 = s.data(); - auto const end = p0 + s.size(); - if(p0 != end) - { - fast_nseg = 1; - // skip leading '/' - // of absolute-path - if(*p0 == '/') - { - ++p0; - ++pos_; - if (p0 == end) - { - fast_nseg = 0; - pos_ = string_view::npos; - } - } - auto p = p0; - while(p != end) - { - if(*p == '/') - { - ++fast_nseg; - break; - } - ++p; - } - front = string_view( - p0, p - p0); - next_ = p - s.data(); - } - else - { - pos_ = string_view::npos; - front = { p0, 0 }; - fast_nseg = 0; - } -} - -bool -path_iter:: -measure( - std::size_t& n) noexcept -{ - if(pos_ == string_view::npos) - return false; - encoding_opts opt; - opt.space_as_plus = false; - n += encoded_size( - s.substr( - pos_, - next_ - pos_), - encode_colons ? - nocolon_pchars : - pchars, - opt); - increment(); - return true; -} - -void -path_iter:: -copy( - char*& dest, - char const* end) noexcept -{ - BOOST_ASSERT(pos_ != - string_view::npos); - encoding_opts opt; - opt.space_as_plus = false; - dest += encode( - dest, - end - dest, - s.substr( - pos_, - next_ - pos_), - encode_colons ? - nocolon_pchars : - pchars, - opt); - increment(); -} - -//------------------------------------------------ -// -// path_encoded_iter -// -//------------------------------------------------ - -path_encoded_iter:: -path_encoded_iter( - pct_string_view s) noexcept - : path_iter(s) -{ -} - -bool -path_encoded_iter:: -measure( - std::size_t& n) noexcept -{ - if(pos_ == string_view::npos) - return false; - encoding_opts opt; - opt.space_as_plus = false; - n += detail::re_encoded_size_unsafe( - s.substr( - pos_, - next_ - pos_), - encode_colons ? - nocolon_pchars : - pchars, - opt); - increment(); - return true; -} - -void -path_encoded_iter:: -copy( - char*& dest, - char const* end) noexcept -{ - BOOST_ASSERT(pos_ != - string_view::npos); - encoding_opts opt; - opt.space_as_plus = false; - detail::re_encode_unsafe( - dest, - end, - s.substr( - pos_, - next_ - pos_), - encode_colons ? - nocolon_pchars : - pchars, - opt); - increment(); -} - //------------------------------------------------ // // segment_iter diff --git a/include/boost/url/impl/url_base.ipp b/include/boost/url/impl/url_base.ipp index 6b20fc9fe..40f464e9d 100644 --- a/include/boost/url/impl/url_base.ipp +++ b/include/boost/url/impl/url_base.ipp @@ -1088,13 +1088,115 @@ url_base:: set_path( string_view s) { - edit_segments( - detail::segments_iter_impl( - detail::path_ref(impl_)), - detail::segments_iter_impl( - detail::path_ref(impl_), 0), - detail::path_iter(s), - s.starts_with('/')); + op_t op(*this, &s); + encoding_opts opt; + +//------------------------------------------------ +// +// Calculate encoded size +// +// - "/"s are not encoded +// - "%2F"s are not encoded +// +// - reserved path chars are re-encoded +// - colons in first segment might need to be re-encoded +// - the path might need to receive a prefix + auto const n = encoded_size( + s, detail::path_chars, opt); + std::size_t n_reencode_colons = 0; + string_view first_seg; + if (!has_scheme() && + !has_authority() && + !s.starts_with('/')) + { + // the first segment with unencoded colons would look + // like the scheme + first_seg = detail::to_sv(s); + std::size_t p = s.find('/'); + if (p != string_view::npos) + first_seg = s.substr(0, p); + n_reencode_colons = std::count( + first_seg.begin(), first_seg.end(), ':'); + } + // the authority can only be followed by an empty or relative path + // if we have an authority and the path is a non-empty relative path, we + // add the "/" prefix to make it valid. + bool make_absolute = + has_authority() && + !s.starts_with('/') && + !s.empty(); + // a path starting with "//" might look like the authority. + // we add a "/." prefix to prevent that + bool add_dot_segment = + !make_absolute && + s.starts_with("//"); + +//------------------------------------------------ +// +// Re-encode data +// + auto dest = set_path_impl( + n + make_absolute + 2 * n_reencode_colons + 2 * add_dot_segment, op); + impl_.decoded_[id_path] = 0; + if (!dest) + { + impl_.nseg_ = 0; + return *this; + } + if (make_absolute) + { + *dest++ = '/'; + impl_.decoded_[id_path] += 1; + } + else if (add_dot_segment) + { + *dest++ = '/'; + *dest++ = '.'; + impl_.decoded_[id_path] += 2; + } + dest += encode_unsafe( + dest, + impl_.get(id_query).data() - dest, + first_seg, + detail::segment_chars - ':', + opt); + dest += encode_unsafe( + dest, + impl_.get(id_query).data() - dest, + s.substr(first_seg.size()), + detail::path_chars, + opt); + impl_.decoded_[id_path] += s.size(); + BOOST_ASSERT(!dest || dest == impl_.get(id_query).data()); + BOOST_ASSERT( + impl_.decoded_[id_path] == + s.size() + make_absolute + 2 * add_dot_segment); + +//------------------------------------------------ +// +// Update path parameters +// + // get the encoded_path with the replacements we applied + if (s == "/") + { + // "/" maps to sequence {} + impl_.nseg_ = 0; + } + else if (!s.empty()) + { + if (s.starts_with("/./")) + s = s.substr(2); + // count segments as number of '/'s + 1 + impl_.nseg_ = std::count( + s.begin() + 1, s.end(), '/') + 1; + } + else + { + // an empty relative path maps to sequence {} + impl_.nseg_ = 0; + } + + check_invariants(); return *this; } @@ -1103,13 +1205,113 @@ url_base:: set_encoded_path( pct_string_view s) { - edit_segments( - detail::segments_iter_impl( - detail::path_ref(impl_)), - detail::segments_iter_impl( - detail::path_ref(impl_), 0), - detail::path_encoded_iter(s), - s.starts_with('/')); + op_t op(*this, &detail::ref(s)); + encoding_opts opt; + +//------------------------------------------------ +// +// Calculate re-encoded output size +// +// - reserved path chars are re-encoded +// - colons in first segment might need to be re-encoded +// - the path might need to receive a prefix + auto const n = detail::re_encoded_size_unsafe( + s, detail::path_chars, opt); + std::size_t n_reencode_colons = 0; + string_view first_seg; + if (!has_scheme() && + !has_authority() && + !s.starts_with('/')) + { + // the first segment with unencoded colons would look + // like the scheme + first_seg = detail::to_sv(s); + std::size_t p = s.find('/'); + if (p != string_view::npos) + first_seg = s.substr(0, p); + n_reencode_colons = std::count( + first_seg.begin(), first_seg.end(), ':'); + } + // the authority can only be followed by an empty or relative path + // if we have an authority and the path is a non-empty relative path, we + // add the "/" prefix to make it valid. + bool make_absolute = + has_authority() && + !s.starts_with('/') && + !s.empty(); + // a path starting with "//" might look like the authority + // we add a "/." prefix to prevent that + bool add_dot_segment = + !make_absolute && + s.starts_with("//"); + +//------------------------------------------------ +// +// Re-encode data +// + auto dest = set_path_impl( + n + make_absolute + 2 * n_reencode_colons + 2 * add_dot_segment, op); + impl_.decoded_[id_path] = 0; + if (!dest) + { + impl_.nseg_ = 0; + return *this; + } + if (make_absolute) + { + *dest++ = '/'; + impl_.decoded_[id_path] += 1; + } + else if (add_dot_segment) + { + *dest++ = '/'; + *dest++ = '.'; + impl_.decoded_[id_path] += 2; + } + impl_.decoded_[id_path] += + detail::re_encode_unsafe( + dest, + impl_.get(id_query).data(), + first_seg, + detail::segment_chars - ':', + opt); + impl_.decoded_[id_path] += + detail::re_encode_unsafe( + dest, + impl_.get(id_query).data(), + s.substr(first_seg.size()), + detail::path_chars, + opt); + BOOST_ASSERT(dest == impl_.get(id_query).data()); + BOOST_ASSERT( + impl_.decoded_[id_path] == + s.decoded_size() + make_absolute + 2 * add_dot_segment); + +//------------------------------------------------ +// +// Update path parameters +// + // get the encoded_path with the replacements we applied + if (s == "/") + { + // "/" maps to sequence {} + impl_.nseg_ = 0; + } + else if (!s.empty()) + { + if (s.starts_with("/./")) + s = s.substr(2); + // count segments as number of '/'s + 1 + impl_.nseg_ = std::count( + s.begin() + 1, s.end(), '/') + 1; + } + else + { + // an empty relative path maps to sequence {} + impl_.nseg_ = 0; + } + + check_invariants(); return *this; } @@ -1563,7 +1765,8 @@ normalize_path() skip_dot = 0; } else if ( - !has_scheme()) + !has_scheme() && + !has_authority()) { if (p.starts_with("./")) { @@ -2045,6 +2248,19 @@ set_port_impl( return dest + 3; } +char* +url_base:: +set_path_impl( + std::size_t n, + op_t& op) +{ + check_invariants(); + auto const dest = resize_impl( + id_path, n, op); + return dest; +} + + //------------------------------------------------ // return the first segment of the path. diff --git a/include/boost/url/url_base.hpp b/include/boost/url/url_base.hpp index bb69592d5..0d4fc28a0 100644 --- a/include/boost/url/url_base.hpp +++ b/include/boost/url/url_base.hpp @@ -1703,6 +1703,14 @@ class BOOST_SYMBOL_VISIBLE to ensure that no other parts of the url is semantically affected. + @note + This function does not encode '/' chars, which + are unreserved for paths but reserved for + path segments. If a path segment should include + encoded '/'s to differentiate it from path separators, + the functions @ref set_encoded_path or @ref segments + should be used instead. + @par Example @code url u( "http://www.example.com" ); @@ -2809,6 +2817,7 @@ class BOOST_SYMBOL_VISIBLE char* set_userinfo_impl(std::size_t n, op_t& op); char* set_host_impl(std::size_t n, op_t& op); char* set_port_impl(std::size_t n, op_t& op); + char* set_path_impl(std::size_t n, op_t& op); string_view first_segment() const noexcept; diff --git a/test/unit/url.cpp b/test/unit/url.cpp index 6c3182827..368b4a7ef 100644 --- a/test/unit/url.cpp +++ b/test/unit/url.cpp @@ -313,11 +313,13 @@ struct url_test { auto ok = [](string_view u0, string_view p) { - urls::url u(u0); - u.set_path(p); - BOOST_TEST_CSTR_EQ(u.buffer(), p); - u.normalize(); - BOOST_TEST_CSTR_EQ(u.buffer(), p); + urls::url u(u0); + u.set_encoded_path(p); + BOOST_TEST_CSTR_EQ(u.buffer(), p); + u.set_path(p); + BOOST_TEST_CSTR_EQ(u.buffer(), p); + u.normalize(); + BOOST_TEST_CSTR_EQ(u.buffer(), p); }; ok("/", "/"); ok("/", ""); @@ -344,27 +346,28 @@ struct url_test // empty url u = parse_uri("x://y/path/to/file.txt?q#f").value(); u.set_encoded_path(""); - BOOST_TEST_EQ(u.encoded_path(), ""); - BOOST_TEST_EQ(u.buffer(), "x://y?q#f"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), ""); + BOOST_TEST_CSTR_EQ(u.buffer(), "x://y?q#f"); } { // path-abempty url u = parse_uri("x://y/path/to/file.txt?q#f").value(); u.set_encoded_path("/x"); - BOOST_TEST_EQ(u.encoded_path(), "/x"); - BOOST_TEST_EQ(u.buffer(), "x://y/x?q#f"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "/x"); + BOOST_TEST_CSTR_EQ(u.buffer(), "x://y/x?q#f"); u.set_encoded_path("x/"); - BOOST_TEST_EQ(u.buffer(), "x://y/x/?q#f"); + BOOST_TEST_CSTR_EQ(u.buffer(), "x://y/x/?q#f"); } { // path-absolute url u = parse_relative_ref("/path/to/file.txt").value(); u.set_encoded_path("/home/file.txt"); - BOOST_TEST_EQ(u.encoded_path(), "/home/file.txt"); - BOOST_TEST_EQ(u.buffer(), "/home/file.txt"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "/home/file.txt"); + BOOST_TEST_CSTR_EQ(u.buffer(), "/home/file.txt"); u.set_encoded_path("//home/file.txt"); + BOOST_TEST_CSTR_EQ(u.buffer(), "/.//home/file.txt"); equal(u, { "", "home", "file.txt" }); - BOOST_TEST_EQ(u.encoded_path(), "/.//home/file.txt"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "/.//home/file.txt"); BOOST_TEST_THROWS(u.set_encoded_path("/home/%ile.txt"), system_error); } @@ -372,26 +375,26 @@ struct url_test // path-rootless url u = parse_uri("x:mailto").value(); u.set_encoded_path("file.txt"); - BOOST_TEST_EQ(u.encoded_path(), "file.txt"); - BOOST_TEST_EQ(u.buffer(), "x:file.txt"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "file.txt"); + BOOST_TEST_CSTR_EQ(u.buffer(), "x:file.txt"); u.set_encoded_path(":file.txt"); - BOOST_TEST_EQ(u.encoded_path(), ":file.txt"); - BOOST_TEST_EQ(u.buffer(), "x::file.txt"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), ":file.txt"); + BOOST_TEST_CSTR_EQ(u.buffer(), "x::file.txt"); // to path-absolute u.set_encoded_path("/file.txt"); - BOOST_TEST_EQ(u.encoded_path(), "/file.txt"); - BOOST_TEST_EQ(u.buffer(), "x:/file.txt"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "/file.txt"); + BOOST_TEST_CSTR_EQ(u.buffer(), "x:/file.txt"); } { // path-noscheme url u = parse_relative_ref("mailto").value(); u.set_encoded_path("file.txt"); - BOOST_TEST_EQ(u.encoded_path(), "file.txt"); - BOOST_TEST_EQ(u.buffer(), "file.txt"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "file.txt"); + BOOST_TEST_CSTR_EQ(u.buffer(), "file.txt"); u.set_encoded_path(":file.txt"); - BOOST_TEST_EQ(u.encoded_path(), "%3Afile.txt"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "%3Afile.txt"); u.set_encoded_path("http:index.htm"); - BOOST_TEST_EQ(u.encoded_path(), "http%3Aindex.htm"); + BOOST_TEST_CSTR_EQ(u.encoded_path(), "http%3Aindex.htm"); } // set_encoded_path @@ -445,7 +448,7 @@ struct url_test { url u = parse_uri_reference(s0).value(); u.set_path(arg); - BOOST_TEST_EQ(u.buffer(), match); + BOOST_TEST_CSTR_EQ(u.buffer(), match); }; check( "", @@ -459,6 +462,10 @@ struct url_test "", "/path/to/file.txt", "/path/to/file.txt"); + check( + "", + "/path%2Fto%2Ffile.txt", + "/path%252Fto%252Ffile.txt"); check( "", "//index.htm", @@ -564,23 +571,23 @@ struct url_test url u; u.set_encoded_fragment(""); BOOST_TEST(u.has_fragment()); - BOOST_TEST_EQ(u.buffer(), "#"); - BOOST_TEST_EQ(u.encoded_fragment(), ""); + BOOST_TEST_CSTR_EQ(u.buffer(), "#"); + BOOST_TEST_CSTR_EQ(u.encoded_fragment(), ""); } { url u; u.set_encoded_fragment("x"); BOOST_TEST(u.has_fragment()); - BOOST_TEST_EQ(u.buffer(), "#x"); - BOOST_TEST_EQ(u.encoded_fragment(), "x"); + BOOST_TEST_CSTR_EQ(u.buffer(), "#x"); + BOOST_TEST_CSTR_EQ(u.encoded_fragment(), "x"); } { url u; u.set_encoded_fragment("%41"); BOOST_TEST(u.has_fragment()); - BOOST_TEST_EQ(u.buffer(), "#%41"); - BOOST_TEST_EQ(u.encoded_fragment(), "%41"); - BOOST_TEST_EQ(u.fragment(), "A"); + BOOST_TEST_CSTR_EQ(u.buffer(), "#%41"); + BOOST_TEST_CSTR_EQ(u.encoded_fragment(), "%41"); + BOOST_TEST_CSTR_EQ(u.fragment(), "A"); } { url u; @@ -601,9 +608,9 @@ struct url_test url u; u.set_fragment(f); BOOST_TEST(u.has_fragment()); - BOOST_TEST_EQ(u.buffer(), h); - BOOST_TEST_EQ(u.encoded_fragment(), ef); - BOOST_TEST_EQ(u.fragment(), f); + BOOST_TEST_CSTR_EQ(u.buffer(), h); + BOOST_TEST_CSTR_EQ(u.encoded_fragment(), ef); + BOOST_TEST_CSTR_EQ(u.fragment(), f); }; good("", "#", ""); @@ -656,7 +663,7 @@ struct url_test f(u); equal(u.segments(), init); equal(u.encoded_segments(), init); - BOOST_TEST_EQ(u.buffer(), s1); + BOOST_TEST_CSTR_EQ(u.buffer(), s1); } template @@ -675,7 +682,7 @@ struct url_test f(u); equal(u.segments(), dec_init); equal(u.encoded_segments(), enc_init); - BOOST_TEST_EQ(u.buffer(), s1); + BOOST_TEST_CSTR_EQ(u.buffer(), s1); } void @@ -729,7 +736,7 @@ struct url_test u.segments() = init; equal(u.segments(), init); //equal(u.encoded_segments(), init); - BOOST_TEST_EQ(u.buffer(), s1); + BOOST_TEST_CSTR_EQ(u.buffer(), s1); } }; @@ -839,14 +846,14 @@ struct url_test result rv = resolve(ub, ur, u); if(! BOOST_TEST( rv.has_value() )) return; - BOOST_TEST_EQ(u.buffer(), m); + BOOST_TEST_CSTR_EQ(u.buffer(), m); // in place resolution url base( ub ); rv = base.resolve( ur ); if(! BOOST_TEST( rv.has_value() )) return; - BOOST_TEST_EQ(base.buffer(), m); + BOOST_TEST_CSTR_EQ(base.buffer(), m); }; check("g:h" , "g:h");