diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc index 63aedcccb8f..0cea33ac9ee 100644 --- a/proxy/hdrs/URL.cc +++ b/proxy/hdrs/URL.cc @@ -1859,6 +1859,16 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h while (t < ends[i]) { if ((i == 0) || (i == 6)) { // scheme and host unescape_str_tolower(p, e, t, ends[i], s); + } else if (i == 8 || i == 10 || i == 12) { // path, params, query + // Don't unescape the parts of the URI that are processed by the + // origin since it may behave differently based upon whether these are + // escaped or not. Therefore differently encoded strings should be + // cached separately via differentiated hashes. + int path_len = ends[i] - t; + int min_len = std::min(path_len, static_cast(e - p)); + memcpy(p, t, min_len); + p += min_len; + t += min_len; } else { unescape_str(p, e, t, ends[i], s); } diff --git a/proxy/hdrs/unit_tests/test_URL.cc b/proxy/hdrs/unit_tests/test_URL.cc index bb1886d5800..a5741839ef7 100644 --- a/proxy/hdrs/unit_tests/test_URL.cc +++ b/proxy/hdrs/unit_tests/test_URL.cc @@ -23,6 +23,7 @@ #include "catch.hpp" #include "URL.h" +#include "tscore/CryptoHash.h" TEST_CASE("ValidateURL", "[proxy][validurl]") { @@ -562,3 +563,129 @@ TEST_CASE("UrlParse", "[proxy][parseurl]") test_parse(test_case, URL_PARSE_REGEX); } } + +struct get_hash_test_case { + const std::string description; + const std::string uri_1; + const std::string uri_2; + const bool ignore_query; + const bool has_equal_hash; +}; + +constexpr bool HAS_EQUAL_HASH = true; +constexpr bool IGNORE_QUERY = true; + +// clang-format off +std::vector get_hash_test_cases = { + { + "No encoding: equal hashes", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + !IGNORE_QUERY, + HAS_EQUAL_HASH, + }, + { + "Scheme encoded: equal hashes", + "http%3C://one.example.com/a/path?name=value#some=value?with_question#fragment", + "http<://one.example.com/a/path?name=value#some=value?with_question#fragment", + !IGNORE_QUERY, + HAS_EQUAL_HASH, + }, + { + "Host encoded: equal hashes", + "http://one%2Eexample.com/a/path?name=value#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + !IGNORE_QUERY, + HAS_EQUAL_HASH, + }, + { + "Path encoded: differing hashes", + "http://one.example.com/a%2Fpath?name=value#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + !IGNORE_QUERY, + !HAS_EQUAL_HASH, + }, + { + "Query = encoded: differing hashes", + "http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + !IGNORE_QUERY, + !HAS_EQUAL_HASH, + }, + { + "Query = encoded but ignore_query: equal hashes", + "http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + IGNORE_QUERY, + HAS_EQUAL_HASH, + }, + { + "Query internal encoded: differing hashes", + "http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment", + "http://one.example.com/a/path?name=valu]#some=value?with_question#fragment", + !IGNORE_QUERY, + !HAS_EQUAL_HASH, + }, + { + "Query internal encoded but ignore_query: equal hashes", + "http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment", + "http://one.example.com/a/path?name=valu]#some=value?with_question#fragment", + IGNORE_QUERY, + HAS_EQUAL_HASH, + }, + { + "Fragment encoded: fragment is not part of the hash", + "http://one.example.com/a/path?name=value#some=value?with_question#frag%7Dent", + "http://one.example.com/a/path?name=value#some=value?with_question/frag}ent", + !IGNORE_QUERY, + HAS_EQUAL_HASH, + }, + { + "Username encoded: equal hashes", + "mysql://my%7Eser:mypassword@localhost/mydatabase", + "mysql://my~ser:mypassword@localhost/mydatabase", + !IGNORE_QUERY, + HAS_EQUAL_HASH, + }, + { + "Password encoded: equal hashes", + "mysql://myuser:mypa%24sword@localhost/mydatabase", + "mysql://myuser:mypa$sword@localhost/mydatabase", + !IGNORE_QUERY, + HAS_EQUAL_HASH, + }, +}; + +/** Return the hash related to a URI. + * + * @param[in] uri The URI to hash. + * @return The hash of the URI. + */ +CryptoHash +get_hash(const std::string &uri, bool ignore_query) +{ + URL url; + HdrHeap *heap = new_HdrHeap(); + url.create(heap); + url.parse(uri); + CryptoHash hash; + url.hash_get(&hash, ignore_query); + heap->destroy(); + return hash; +} + +TEST_CASE("UrlHashGet", "[url][hash_get]") +{ + for (auto const &test_case : get_hash_test_cases) { + std::string description = test_case.description + ": " + test_case.uri_1 + " vs " + test_case.uri_2; + SECTION(description) { + CryptoHash hash1 = get_hash(test_case.uri_1, test_case.ignore_query); + CryptoHash hash2 = get_hash(test_case.uri_2, test_case.ignore_query); + if (test_case.has_equal_hash) { + CHECK(hash1 == hash2); + } else { + CHECK(hash1 != hash2); + } + } + } +} diff --git a/tests/gold_tests/url/uri.replay.yaml b/tests/gold_tests/url/uri.replay.yaml index 4935ff7951e..d6689db11fb 100644 --- a/tests/gold_tests/url/uri.replay.yaml +++ b/tests/gold_tests/url/uri.replay.yaml @@ -33,12 +33,20 @@ meta: - [ Content-Length, 16 ] - [ Cache-Control, max-age=300 ] + - 404_ok_response: &404_ok_response + server-response: + status: 404 + reason: "Not Found" + headers: + fields: + - [ Content-Length, 16 ] + - [ Cache-Control, max-age=300 ] + sessions: - transactions: # A simply path - - all: { headers: { fields: [[ uuid, 1 ]]}} - client-request: + - client-request: method: "GET" version: "1.1" scheme: "http" @@ -46,6 +54,7 @@ sessions: headers: fields: - [ Host, example.com ] + - [ uuid, 1 ] <<: *200_ok_response @@ -54,8 +63,7 @@ sessions: # An initial // without an authority is not valid per RFC 3986 section-3.3, # but we have historically accepted it and will continue to do so. - - all: { headers: { fields: [[ uuid, 2 ]]}} - client-request: + - client-request: method: "GET" version: "1.1" scheme: "http" @@ -63,6 +71,7 @@ sessions: headers: fields: - [ Host, example.com ] + - [ uuid, 2 ] <<: *200_ok_response @@ -70,8 +79,7 @@ sessions: status: 200 # A '^' is an invalid path. - - all: { headers: { fields: [[ uuid, 3 ]]}} - client-request: + - client-request: method: "GET" version: "1.1" scheme: "http" @@ -79,8 +87,99 @@ sessions: headers: fields: - [ Host, example.com ] + - [ uuid, 3 ] <<: *200_ok_response proxy-response: status: 400 + +- transactions: + # + # Make sure that we consistently treat encoding of paths. + # + - client-request: + method: "GET" + version: "1.1" + url: /path%2ffoo + headers: + fields: + - [ Host, test.com ] + - [ Connection, keep-alive ] + - [ uuid, encoded_path ] + + proxy-request: + # Verify that the origin gets the encoded path. + url: /path%2ffoo + + <<: *404_ok_response + + proxy-response: + status: 404 + + # Request with an unencoded path: go to origin. + - client-request: + # Give plenty of time to cache the response. + delay: 200ms + + method: "GET" + version: "1.1" + # Note that the path is not encoded. + url: /path/foo + headers: + fields: + - [ Host, test.com ] + - [ Connection, keep-alive ] + - [ uuid, unencoded_path ] + + proxy-request: + url: /path/foo + + <<: *200_ok_response + + proxy-response: + # ATS should treat the unencoded path as different and go to the origin, + # not serve the encoded path out of cache. + status: 200 + + # Request again with an encoded path, should be served from cache. + - client-request: + method: "GET" + version: "1.1" + url: /path%2ffoo + headers: + fields: + - [ Host, test.com ] + - [ Connection, keep-alive ] + - [ uuid, encoded_path_again ] + + # The origin should not receive this request, but if it does, reply with a + # non-404 response so we can detect it. + <<: *200_ok_response + + proxy-response: + # ATS should serve the encoded path response from cache. + status: 404 + + # Request again with an encoded path, should be served from cache. + - client-request: + # Give plenty of time to cache the unencoded response. + delay: 200ms + + method: "GET" + version: "1.1" + # Note that the path is not encoded. + url: /path/foo + headers: + fields: + - [ Host, test.com ] + - [ Connection, keep-alive ] + - [ uuid, unencoded_path_again ] + + # The origin should not receive this request, but if it does, reply with a + # non-200 response so we can detect it. + <<: *404_ok_response + + proxy-response: + # Verify that ATS returns out of cache for the uneencoded path object. + status: 200 diff --git a/tests/gold_tests/url/uri.test.py b/tests/gold_tests/url/uri.test.py index 8589d07f0f3..f1ccd77e009 100644 --- a/tests/gold_tests/url/uri.test.py +++ b/tests/gold_tests/url/uri.test.py @@ -26,7 +26,7 @@ server = Test.MakeVerifierServerProcess("server", replay_file) ts.Disk.records_config.update({ 'proxy.config.diags.debug.enabled': 1, - 'proxy.config.diags.debug.tags': 'http', + 'proxy.config.diags.debug.tags': 'http|cache|url', }) ts.Disk.remap_config.AddLine( 'map / http://127.0.0.1:{0}'.format(server.Variables.http_port) @@ -34,4 +34,8 @@ tr = Test.AddTestRun("Verify correct URI parsing behavior.") tr.Processes.Default.StartBefore(server) tr.Processes.Default.StartBefore(ts) -tr.AddVerifierClientProcess("client", replay_file, http_ports=[ts.Variables.port]) +tr.AddVerifierClientProcess( + "client", + replay_file, + http_ports=[ts.Variables.port], + other_args='--thread-limit 1')