Skip to content

Commit

Permalink
fix #1076: Dialogue allows colons in http paths and query parameters
Browse files Browse the repository at this point in the history
This is required by some GCP APIs, where encoded colons are not
respected.
[rfc3986 section 3.3](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
clearly allows colon as a pchar, however not all servers are necessarily fully
compliant with the rfc. We've had issues with specific sub-delimiters
in the past. This shouldn't be terribly risky, esepcially given this
implementaiton has never allowed unencoded colons before, and we have
no evidence to suggest they should be problematic.
  • Loading branch information
carterkozak committed Sep 23, 2024
1 parent 8386581 commit 0340bcb
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -239,16 +239,37 @@ public String toString() {
static class UrlEncoder {
private static final CharMatcher DIGIT = CharMatcher.inRange('0', '9');
private static final CharMatcher ALPHA = CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('A', 'Z'));

// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
private static final CharMatcher UNRESERVED = DIGIT.or(ALPHA).or(CharMatcher.anyOf("-._~"));

// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
private static final CharMatcher SUB_DELIMS = CharMatcher.anyOf("!$&'()*+,;=");
private static final CharMatcher IS_HOST = UNRESERVED.or(SUB_DELIMS);
private static final CharMatcher IS_P_CHAR = UNRESERVED;
private static final CharMatcher IS_PATH = UNRESERVED.or(SUB_DELIMS).or(CharMatcher.anyOf("/"));

// The RFC permits percent-encoding any character. We also percent encode sub-delimiters to avoid
// incompatibilities with http specification beyond the general URI definition per
// https://tools.ietf.org/html/rfc3986#section-3.3
// > URI producing applications often use the reserved characters allowed in a segment to
// > delimit scheme-specific or dereference-handler-specific subcomponents.

// If memory serves, we've encountered issues with '+' being interpreted by some servers as an
// encoded space, and ';' in paths being interpreted as the beginning of http matrix parameters.

// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
// A strict implementation of the rfc allow 'UNRESERVED.or(CharMatcher.anyOf(":@"))',
// however we have updated it from 'UNRESERVED' to allow colons, and would like to
// make changes as slowly as possible.
private static final CharMatcher IS_P_CHAR = UNRESERVED.or(CharMatcher.is(':'));

// For historical reasons, we allow sub-delims in the base uri if provided directly,
// but escape them in paths we create/encode. Base-uri paths tend to be simple, so I wouldn't
// expect the sub-delims to be relevant here in the vast majority of cases. With sufficient
// research and testing, we should incorporate relevant sub-delims into IS_P_CHAR and update this
// to: 'IS_P_CHAR.or(CharMatcher.is('/'))'.
private static final CharMatcher IS_PATH = IS_P_CHAR.or(SUB_DELIMS).or(CharMatcher.is('/'));

// query = *( pchar / "/" / "?" )
private static final CharMatcher IS_QUERY_CHAR = IS_P_CHAR.or(CharMatcher.anyOf("/?"));

static boolean isHost(String maybeHost) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ public void encodesPaths() throws Exception {
.build()
.toString())
.isEqualTo("http://host:80/%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D"
+ "%5B%5D%7C%5C%7C%22%27%3A%3B%2F%3F.%3E%2C%3C~%60");
+ "%5B%5D%7C%5C%7C%22%27:%3B%2F%3F.%3E%2C%3C~%60");
assertThat(minimalUrl()
.pathSegments(List.of("!@#$%^&*()_+{}", "[]|\\|\"':;/?.>,<~`"))
.build()
.toString())
.isEqualTo("http://host:80/%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D"
+ "/%5B%5D%7C%5C%7C%22%27%3A%3B%2F%3F.%3E%2C%3C~%60");
+ "/%5B%5D%7C%5C%7C%22%27:%3B%2F%3F.%3E%2C%3C~%60");
}

@Test
Expand Down Expand Up @@ -190,7 +190,24 @@ public void urlEncoder_encodeQuery_onlyEncodesNonReservedChars() {

@Test
public void urlEncoder_encodeQuery_encodesPlusSign() {
assertThat(BaseUrl.UrlEncoder.encodeQueryNameOrValue("+")).isEqualTo("%2B");
assertThat(BaseUrl.UrlEncoder.encodeQueryNameOrValue("+"))
.as("'+' must be encoded, otherwise many servers will interpret it as a url encoded space")
.isEqualTo("%2B");
}

@Test
public void urlEncoder_encodePath_encodesSemicolon() {
assertThat(BaseUrl.UrlEncoder.encodePathSegment(";"))
.as("';' must be encoded, otherwise many servers will "
+ "interpret as a delimiter for matrix parameters")
.isEqualTo("%3B");
}

@Test
public void urlEncoder_encodePath_allowsColon() {
assertThat(BaseUrl.UrlEncoder.encodePathSegment(":"))
.as("Several GCP APIs use ':' within paths, and do not handle encoded colons")
.isEqualTo(":");
}

@Test
Expand Down

0 comments on commit 0340bcb

Please sign in to comment.