From 0340bcb6775a4c209794532890f3f91f06f3921f Mon Sep 17 00:00:00 2001 From: Carter Kozak Date: Mon, 23 Sep 2024 10:53:01 -0400 Subject: [PATCH] fix #1076: Dialogue allows colons in http paths and query parameters This is required by some GCP APIs, where encoded colons are not respected. [rfc3986 section 3.3](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3) clearly allows colon as a pchar, however not all servers are necessarily fully compliant with the rfc. We've had issues with specific sub-delimiters in the past. This shouldn't be terribly risky, esepcially given this implementaiton has never allowed unencoded colons before, and we have no evidence to suggest they should be problematic. --- .../com/palantir/dialogue/core/BaseUrl.java | 25 +++++++++++++++++-- .../dialogue/core/UrlBuilderTest.java | 23 ++++++++++++++--- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/dialogue-core/src/main/java/com/palantir/dialogue/core/BaseUrl.java b/dialogue-core/src/main/java/com/palantir/dialogue/core/BaseUrl.java index 91e4fd0d6..bb43aa59a 100644 --- a/dialogue-core/src/main/java/com/palantir/dialogue/core/BaseUrl.java +++ b/dialogue-core/src/main/java/com/palantir/dialogue/core/BaseUrl.java @@ -239,16 +239,37 @@ public String toString() { static class UrlEncoder { private static final CharMatcher DIGIT = CharMatcher.inRange('0', '9'); private static final CharMatcher ALPHA = CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('A', 'Z')); + + // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" private static final CharMatcher UNRESERVED = DIGIT.or(ALPHA).or(CharMatcher.anyOf("-._~")); + + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" private static final CharMatcher SUB_DELIMS = CharMatcher.anyOf("!$&'()*+,;="); private static final CharMatcher IS_HOST = UNRESERVED.or(SUB_DELIMS); - private static final CharMatcher IS_P_CHAR = UNRESERVED; - private static final CharMatcher IS_PATH = UNRESERVED.or(SUB_DELIMS).or(CharMatcher.anyOf("/")); + // The RFC permits percent-encoding any character. We also percent encode sub-delimiters to avoid // incompatibilities with http specification beyond the general URI definition per // https://tools.ietf.org/html/rfc3986#section-3.3 // > URI producing applications often use the reserved characters allowed in a segment to // > delimit scheme-specific or dereference-handler-specific subcomponents. + + // If memory serves, we've encountered issues with '+' being interpreted by some servers as an + // encoded space, and ';' in paths being interpreted as the beginning of http matrix parameters. + + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // A strict implementation of the rfc allow 'UNRESERVED.or(CharMatcher.anyOf(":@"))', + // however we have updated it from 'UNRESERVED' to allow colons, and would like to + // make changes as slowly as possible. + private static final CharMatcher IS_P_CHAR = UNRESERVED.or(CharMatcher.is(':')); + + // For historical reasons, we allow sub-delims in the base uri if provided directly, + // but escape them in paths we create/encode. Base-uri paths tend to be simple, so I wouldn't + // expect the sub-delims to be relevant here in the vast majority of cases. With sufficient + // research and testing, we should incorporate relevant sub-delims into IS_P_CHAR and update this + // to: 'IS_P_CHAR.or(CharMatcher.is('/'))'. + private static final CharMatcher IS_PATH = IS_P_CHAR.or(SUB_DELIMS).or(CharMatcher.is('/')); + + // query = *( pchar / "/" / "?" ) private static final CharMatcher IS_QUERY_CHAR = IS_P_CHAR.or(CharMatcher.anyOf("/?")); static boolean isHost(String maybeHost) { diff --git a/dialogue-core/src/test/java/com/palantir/dialogue/core/UrlBuilderTest.java b/dialogue-core/src/test/java/com/palantir/dialogue/core/UrlBuilderTest.java index af9a9dee0..b43b3014e 100644 --- a/dialogue-core/src/test/java/com/palantir/dialogue/core/UrlBuilderTest.java +++ b/dialogue-core/src/test/java/com/palantir/dialogue/core/UrlBuilderTest.java @@ -74,13 +74,13 @@ public void encodesPaths() throws Exception { .build() .toString()) .isEqualTo("http://host:80/%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D" - + "%5B%5D%7C%5C%7C%22%27%3A%3B%2F%3F.%3E%2C%3C~%60"); + + "%5B%5D%7C%5C%7C%22%27:%3B%2F%3F.%3E%2C%3C~%60"); assertThat(minimalUrl() .pathSegments(List.of("!@#$%^&*()_+{}", "[]|\\|\"':;/?.>,<~`")) .build() .toString()) .isEqualTo("http://host:80/%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D" - + "/%5B%5D%7C%5C%7C%22%27%3A%3B%2F%3F.%3E%2C%3C~%60"); + + "/%5B%5D%7C%5C%7C%22%27:%3B%2F%3F.%3E%2C%3C~%60"); } @Test @@ -190,7 +190,24 @@ public void urlEncoder_encodeQuery_onlyEncodesNonReservedChars() { @Test public void urlEncoder_encodeQuery_encodesPlusSign() { - assertThat(BaseUrl.UrlEncoder.encodeQueryNameOrValue("+")).isEqualTo("%2B"); + assertThat(BaseUrl.UrlEncoder.encodeQueryNameOrValue("+")) + .as("'+' must be encoded, otherwise many servers will interpret it as a url encoded space") + .isEqualTo("%2B"); + } + + @Test + public void urlEncoder_encodePath_encodesSemicolon() { + assertThat(BaseUrl.UrlEncoder.encodePathSegment(";")) + .as("';' must be encoded, otherwise many servers will " + + "interpret as a delimiter for matrix parameters") + .isEqualTo("%3B"); + } + + @Test + public void urlEncoder_encodePath_allowsColon() { + assertThat(BaseUrl.UrlEncoder.encodePathSegment(":")) + .as("Several GCP APIs use ':' within paths, and do not handle encoded colons") + .isEqualTo(":"); } @Test