Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions proxy/hdrs/URL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1859,6 +1859,16 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h
while (t < ends[i]) {
if ((i == 0) || (i == 6)) { // scheme and host
unescape_str_tolower(p, e, t, ends[i], s);
} else if (i == 8 || i == 10 || i == 12) { // path, params, query
// Don't unescape the parts of the URI that are processed by the
// origin since it may behave differently based upon whether these are
// escaped or not. Therefore differently encoded strings should be
// cached separately via differentiated hashes.
int path_len = ends[i] - t;
int min_len = std::min(path_len, static_cast<int>(e - p));
memcpy(p, t, min_len);
p += min_len;
t += min_len;
} else {
unescape_str(p, e, t, ends[i], s);
}
Expand Down
127 changes: 127 additions & 0 deletions proxy/hdrs/unit_tests/test_URL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "catch.hpp"

#include "URL.h"
#include "tscore/CryptoHash.h"

TEST_CASE("ValidateURL", "[proxy][validurl]")
{
Expand Down Expand Up @@ -562,3 +563,129 @@ TEST_CASE("UrlParse", "[proxy][parseurl]")
test_parse(test_case, URL_PARSE_REGEX);
}
}

struct get_hash_test_case {
const std::string description;
const std::string uri_1;
const std::string uri_2;
const bool ignore_query;
const bool has_equal_hash;
};

constexpr bool HAS_EQUAL_HASH = true;
constexpr bool IGNORE_QUERY = true;

// clang-format off
std::vector<get_hash_test_case> get_hash_test_cases = {
{
"No encoding: equal hashes",
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
!IGNORE_QUERY,
HAS_EQUAL_HASH,
},
{
"Scheme encoded: equal hashes",
"http%3C://one.example.com/a/path?name=value#some=value?with_question#fragment",
"http<://one.example.com/a/path?name=value#some=value?with_question#fragment",
!IGNORE_QUERY,
HAS_EQUAL_HASH,
},
{
"Host encoded: equal hashes",
"http://one%2Eexample.com/a/path?name=value#some=value?with_question#fragment",
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
!IGNORE_QUERY,
HAS_EQUAL_HASH,
},
{
"Path encoded: differing hashes",
"http://one.example.com/a%2Fpath?name=value#some=value?with_question#fragment",
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
!IGNORE_QUERY,
!HAS_EQUAL_HASH,
},
{
"Query = encoded: differing hashes",
"http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment",
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
!IGNORE_QUERY,
!HAS_EQUAL_HASH,
},
{
"Query = encoded but ignore_query: equal hashes",
"http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment",
"http://one.example.com/a/path?name=value#some=value?with_question#fragment",
IGNORE_QUERY,
HAS_EQUAL_HASH,
},
{
"Query internal encoded: differing hashes",
"http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment",
"http://one.example.com/a/path?name=valu]#some=value?with_question#fragment",
!IGNORE_QUERY,
!HAS_EQUAL_HASH,
},
{
"Query internal encoded but ignore_query: equal hashes",
"http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment",
"http://one.example.com/a/path?name=valu]#some=value?with_question#fragment",
IGNORE_QUERY,
HAS_EQUAL_HASH,
},
{
"Fragment encoded: fragment is not part of the hash",
"http://one.example.com/a/path?name=value#some=value?with_question#frag%7Dent",
"http://one.example.com/a/path?name=value#some=value?with_question/frag}ent",
!IGNORE_QUERY,
HAS_EQUAL_HASH,
},
{
"Username encoded: equal hashes",
"mysql://my%7Eser:mypassword@localhost/mydatabase",
"mysql://my~ser:mypassword@localhost/mydatabase",
!IGNORE_QUERY,
HAS_EQUAL_HASH,
},
{
"Password encoded: equal hashes",
"mysql://myuser:mypa%24sword@localhost/mydatabase",
"mysql://myuser:mypa$sword@localhost/mydatabase",
!IGNORE_QUERY,
HAS_EQUAL_HASH,
},
};

/** Return the hash related to a URI.
*
* @param[in] uri The URI to hash.
* @return The hash of the URI.
*/
CryptoHash
get_hash(const std::string &uri, bool ignore_query)
{
URL url;
HdrHeap *heap = new_HdrHeap();
url.create(heap);
url.parse(uri);
CryptoHash hash;
url.hash_get(&hash, ignore_query);
heap->destroy();
return hash;
}

TEST_CASE("UrlHashGet", "[url][hash_get]")
{
for (auto const &test_case : get_hash_test_cases) {
std::string description = test_case.description + ": " + test_case.uri_1 + " vs " + test_case.uri_2;
SECTION(description) {
CryptoHash hash1 = get_hash(test_case.uri_1, test_case.ignore_query);
CryptoHash hash2 = get_hash(test_case.uri_2, test_case.ignore_query);
if (test_case.has_equal_hash) {
CHECK(hash1 == hash2);
} else {
CHECK(hash1 != hash2);
}
}
}
}
111 changes: 105 additions & 6 deletions tests/gold_tests/url/uri.replay.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,28 @@ meta:
- [ Content-Length, 16 ]
- [ Cache-Control, max-age=300 ]

- 404_ok_response: &404_ok_response
server-response:
status: 404
reason: "Not Found"
headers:
fields:
- [ Content-Length, 16 ]
- [ Cache-Control, max-age=300 ]

sessions:
- transactions:

# A simply path
- all: { headers: { fields: [[ uuid, 1 ]]}}
client-request:
- client-request:
method: "GET"
version: "1.1"
scheme: "http"
url: /index.html
headers:
fields:
- [ Host, example.com ]
- [ uuid, 1 ]

<<: *200_ok_response

Expand All @@ -54,33 +63,123 @@ sessions:

# An initial // without an authority is not valid per RFC 3986 section-3.3,
# but we have historically accepted it and will continue to do so.
- all: { headers: { fields: [[ uuid, 2 ]]}}
client-request:
- client-request:
method: "GET"
version: "1.1"
scheme: "http"
url: //index.html
headers:
fields:
- [ Host, example.com ]
- [ uuid, 2 ]

<<: *200_ok_response

proxy-response:
status: 200

# A '^' is an invalid path.
- all: { headers: { fields: [[ uuid, 3 ]]}}
client-request:
- client-request:
method: "GET"
version: "1.1"
scheme: "http"
url: ^
headers:
fields:
- [ Host, example.com ]
- [ uuid, 3 ]

<<: *200_ok_response

proxy-response:
status: 400

- transactions:
#
# Make sure that we consistently treat encoding of paths.
#
- client-request:
method: "GET"
version: "1.1"
url: /path%2ffoo
headers:
fields:
- [ Host, test.com ]
- [ Connection, keep-alive ]
- [ uuid, encoded_path ]

proxy-request:
# Verify that the origin gets the encoded path.
url: /path%2ffoo

<<: *404_ok_response

proxy-response:
status: 404

# Request with an unencoded path: go to origin.
- client-request:
# Give plenty of time to cache the response.
delay: 200ms

method: "GET"
version: "1.1"
# Note that the path is not encoded.
url: /path/foo
headers:
fields:
- [ Host, test.com ]
- [ Connection, keep-alive ]
- [ uuid, unencoded_path ]

proxy-request:
url: /path/foo

<<: *200_ok_response

proxy-response:
# ATS should treat the unencoded path as different and go to the origin,
# not serve the encoded path out of cache.
status: 200

# Request again with an encoded path, should be served from cache.
- client-request:
method: "GET"
version: "1.1"
url: /path%2ffoo
headers:
fields:
- [ Host, test.com ]
- [ Connection, keep-alive ]
- [ uuid, encoded_path_again ]

# The origin should not receive this request, but if it does, reply with a
# non-404 response so we can detect it.
<<: *200_ok_response

proxy-response:
# ATS should serve the encoded path response from cache.
status: 404

# Request again with an encoded path, should be served from cache.
- client-request:
# Give plenty of time to cache the unencoded response.
delay: 200ms

method: "GET"
version: "1.1"
# Note that the path is not encoded.
url: /path/foo
headers:
fields:
- [ Host, test.com ]
- [ Connection, keep-alive ]
- [ uuid, unencoded_path_again ]

# The origin should not receive this request, but if it does, reply with a
# non-200 response so we can detect it.
<<: *404_ok_response

proxy-response:
# Verify that ATS returns out of cache for the uneencoded path object.
status: 200
8 changes: 6 additions & 2 deletions tests/gold_tests/url/uri.test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,16 @@
server = Test.MakeVerifierServerProcess("server", replay_file)
ts.Disk.records_config.update({
'proxy.config.diags.debug.enabled': 1,
'proxy.config.diags.debug.tags': 'http',
'proxy.config.diags.debug.tags': 'http|cache|url',
})
ts.Disk.remap_config.AddLine(
'map / http://127.0.0.1:{0}'.format(server.Variables.http_port)
)
tr = Test.AddTestRun("Verify correct URI parsing behavior.")
tr.Processes.Default.StartBefore(server)
tr.Processes.Default.StartBefore(ts)
tr.AddVerifierClientProcess("client", replay_file, http_ports=[ts.Variables.port])
tr.AddVerifierClientProcess(
"client",
replay_file,
http_ports=[ts.Variables.port],
other_args='--thread-limit 1')