Skip to content

Commit

Permalink
Issue #18 URI path processing (#424)
Browse files Browse the repository at this point in the history
Issue #18 URI path processing

Co-authored-by: Mark Thomas <markt@apache.org>
  • Loading branch information
gregw and markt-asf authored Oct 19, 2021
1 parent 72d509f commit 100fd4e
Show file tree
Hide file tree
Showing 3 changed files with 509 additions and 12 deletions.
34 changes: 25 additions & 9 deletions api/src/main/java/jakarta/servlet/http/HttpServletRequest.java
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,14 @@ public String toString() {
* <p>
* This method returns <code>null</code> if there was no extra path information.
*
* @return a <code>String</code>, decoded by the web container, specifying extra path information that comes after the
* servlet path but before the query string in the request URL; or <code>null</code> if the URL does not have any extra
* path information
* @return a <code>String</code> specifying extra path information that comes after the servlet path but before the
* query string in the request URL; or <code>null</code> if the URL does not have any extra path information. The path
* will be canonicalized as per section 3.5 of the specification. This method will not return any encoded characters
* unless the container is configured specifically to allow them.
* @throws IllegalArgumentException In standard configuration, this method will never throw. However, a container may be
* configured to not reject some suspicious sequences identified by 3.5.2, furthermore the container may be configured
* to allow such paths to only be accessed via safer methods like {@link #getRequestURI()} and to throw
* IllegalArgumentException if this method is called for such suspicious paths.
*/
public String getPathInfo();

Expand Down Expand Up @@ -299,8 +304,13 @@ default public PushBuilder newPushBuilder() {
* {@link jakarta.servlet.ServletContext#getContextPath()} should be considered as the prime or preferred context path
* of the application.
*
* @return a <code>String</code> specifying the portion of the request URI that indicates the context of the request
*
* @return a <code>String</code> specifying the portion of the request URI that indicates the context of the request.
* The path will be canonicalized as per section 3.5 of the specification. This method will not return any encoded
* characters unless the container is configured specifically to allow them.
* @throws IllegalArgumentException In standard configuration, this method will never throw. However, a container may be
* configured to not reject some suspicious sequences identified by 3.5.2, furthermore the container may be configured
* to allow such paths to only be accessed via safer methods like {@link #getRequestURI()} and to throw
* IllegalArgumentException if this method is called for such suspicious paths.
* @see jakarta.servlet.ServletContext#getContextPath()
*/
public String getContextPath();
Expand Down Expand Up @@ -411,15 +421,21 @@ default public PushBuilder newPushBuilder() {
public StringBuffer getRequestURL();

/**
* Returns the part of this request's URL that calls the servlet. This path starts with a "/" character and includes
* either the servlet name or a path to the servlet, but does not include any extra path information or a query string.
* Returns the part of this request's URL that calls the servlet. This path starts with a "/" character and includes the
* path to the servlet, but does not include any extra path information or a query string.
*
* <p>
* This method will return an empty string ("") if the servlet used to process this request was matched using the "/*"
* pattern.
*
* @return a <code>String</code> containing the name or path of the servlet being called, as specified in the request
* URL, decoded, or an empty string if the servlet used to process the request is matched using the "/*" pattern.
* @return a <code>String</code> containing the path of the servlet being called, as specified in the request URL, or an
* empty string if the servlet used to process the request is matched using the "/*" pattern. The path will be
* canonicalized as per section 3.5 of the specification. This method will not return any encoded characters unless the
* container is configured specifically to allow them.
* @throws IllegalArgumentException In standard configuration, this method will never throw. However, a container may be
* configured to not reject some suspicious sequences identified by 3.5.2, furthermore the container may be configured
* to allow such paths to only be accessed via safer methods like {@link #getRequestURI()} and to throw
* IllegalArgumentException if this method is called for such suspicious paths.
*/
public String getServletPath();

Expand Down
313 changes: 313 additions & 0 deletions api/src/test/java/jakarta/servlet/http/CanonicalUriPathTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
package jakarta.servlet.http;

import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class CanonicalUriPathTest {

private static final Set<String> ENCODED_DOT_SEGMENT;
static {
Set<String> set = Collections.newSetFromMap(new TreeMap<>(String.CASE_INSENSITIVE_ORDER));
set.add("%2e");
set.add("%2e%2e");
set.add("%2e.");
set.add(".%2e");
ENCODED_DOT_SEGMENT = Collections.unmodifiableSet(set);
}

public static String canonicalUriPath(String uriPath, Consumer<String> rejection) {

// The code presented here is a non-normative implementation of the algorithm
// from section 3.5 of the specification.

if (uriPath == null)
throw new IllegalArgumentException("null path");

String path = uriPath;

// Remember start/end conditions
boolean fragment = false;
boolean startsWithSlash;
boolean dotSegmentWithParam;
boolean encodedDotSegment;
boolean emptyNonLastSegmentWithParam;
boolean emptySegmentBeforeDotDot = false;
boolean decodeError = false;

// Discard fragment.
if (path.contains("#")) {
path = path.substring(0, path.indexOf('#'));
fragment = true;
}

// Separation of path and query.
if (path.contains("?"))
path = path.substring(0, path.indexOf('?'));

// This needs to be checked after removal of path and query
startsWithSlash = path.startsWith("/");

// Split path into segments.
List<String> segments = new ArrayList<>(Arrays.asList(path.substring(startsWithSlash ? 1 : 0).split("/", -1)));

// Remove path parameters.
emptyNonLastSegmentWithParam = segments.stream().limit(segments.size() - 1).anyMatch(s -> s.startsWith(";"));
dotSegmentWithParam = segments.stream().anyMatch(s -> s.startsWith(".;") || s.startsWith("..;"));
segments.replaceAll(s -> (s.contains(";")) ? s.substring(0, s.indexOf(';')) : s);

// Decode characters
encodedDotSegment = segments.stream().anyMatch(ENCODED_DOT_SEGMENT::contains);
try {
segments.replaceAll(CanonicalUriPathTest::decode);
} catch (Exception e) {
decodeError = true;
}

// Remove Empty Segments other than the last
AtomicInteger last = new AtomicInteger(segments.size());
segments.removeIf(s -> last.decrementAndGet() != 0 && s.length() == 0);

// Remove dot-segments
int count = 0;
for (ListIterator<String> s = segments.listIterator(); s.hasNext();) {
String segment = s.next();
if (segment.equals(".")) {
s.remove();
} else if (segment.equals("..")) {
if (count > 0) {
s.remove();
String prev = s.previous();
s.remove();
count--;
emptySegmentBeforeDotDot |= prev.length() == 0;
}
} else {
count++;
}
}

// Concatenate segments
if (segments.size() == 0)
path = "/";
else {
StringBuilder buf = new StringBuilder();
if (!decodeError && uriPath.toLowerCase().contains("%2f")) {
segments.replaceAll(CanonicalUriPathTest::encode);
}
segments.forEach(s -> buf.append("/").append(s));
path = buf.toString();
}

// Rejecting Errors and Suspicious Sequences
if (fragment)
rejection.accept("fragment");
if (decodeError)
rejection.accept("decode error");
// Any path not starting with the `"/"` character
if (!startsWithSlash)
rejection.accept("must start with /");
// Any path starting with an initial segment of `".."`
if (!segments.isEmpty() && segments.get(0).equals(".."))
rejection.accept("leading dot-dot-segment");
// The encoded `"/"` character
if (uriPath.toLowerCase().contains("%2f"))
rejection.accept("encoded /");
// Any `"."` or `".."` segment that had a path parameter
if (dotSegmentWithParam)
rejection.accept("dot segment with parameter");
// Any `"."` or `".."` segment with any encoded characters
if (encodedDotSegment)
rejection.accept("encoded dot segment");
// Any `".."` segment preceded by an empty segment
if (emptySegmentBeforeDotDot)
rejection.accept("empty segment before dot dot");
// Any empty segment with parameters
if (emptyNonLastSegmentWithParam)
rejection.accept("empty segment with parameters");
// The `"\"` character encoded or not.
if (path.contains("\\"))
rejection.accept("backslash character");
// Any control characters either encoded or not.
for (char c : path.toCharArray()) {
if (c < 0x20 || c == 0x7f) {
rejection.accept("control character");
break;
}
}

return path;
}

private static String decode(String segment) {
if (segment.contains("%")) {
StringBuilder buf = new StringBuilder();
ByteArrayOutputStream utf8 = new ByteArrayOutputStream();
for (int i = 0; i < segment.length(); i++) {
char c = segment.charAt(i);
if (c == '%') {
int b = Integer.parseInt(segment.substring(i + 1, i + 3), 16);
if (b < 0)
throw new IllegalArgumentException("negative encoding");
utf8.write(b);
i += 2;
} else {
if (utf8.size() > 0) {
buf.append(fromUtf8(utf8.toByteArray()));
utf8.reset();
}
buf.append(c);
}
}
if (utf8.size() > 0) {
buf.append(fromUtf8(utf8.toByteArray()));
utf8.reset();
}
segment = buf.toString();
}
return segment;
}

private static String encode(String segment) {
if (segment.contains("%") || segment.contains("/")) {
segment = segment.replace("%", "%25");
segment = segment.replace("/", "%2F");
}
return segment;
}

private static CharBuffer fromUtf8(byte[] bytes) {
try {
return StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).decode(ByteBuffer.wrap(bytes));
} catch (CharacterCodingException e) {
throw new IllegalArgumentException(e);
}
}

public static Stream<Arguments> data() {
List<Object[]> data = new ArrayList<>();
data.add(new Object[] { "foo/bar", "/foo/bar", true });
data.add(new Object[] { "/foo/bar", "/foo/bar", false });
data.add(new Object[] { "/foo/bar;jsessionid=1234", "/foo/bar", false });
data.add(new Object[] { "/foo/bar/", "/foo/bar/", false });
data.add(new Object[] { "/foo/bar/;jsessionid=1234", "/foo/bar/", false });
data.add(new Object[] { "/foo;/bar;", "/foo/bar", false });
data.add(new Object[] { "/foo;/bar;/;", "/foo/bar/", false });
data.add(new Object[] { "/foo%00/bar/", "/foo\000/bar/", true });
data.add(new Object[] { "/foo%7Fbar", "/foo\177bar", true });
data.add(new Object[] { "/foo%2Fbar", "/foo%2Fbar", true });
data.add(new Object[] { "/foo%2Fb%25r", "/foo%2Fb%25r", true });
data.add(new Object[] { "/foo/b%25r", "/foo/b%r", false });
data.add(new Object[] { "/foo\\bar", "/foo\\bar", true });
data.add(new Object[] { "/foo%5Cbar", "/foo\\bar", true });
data.add(new Object[] { "/foo;%2F/bar", "/foo/bar", true });
data.add(new Object[] { "/foo/./bar", "/foo/bar", false });
data.add(new Object[] { "/foo/././bar", "/foo/bar", false });
data.add(new Object[] { "/./foo/bar", "/foo/bar", false });
data.add(new Object[] { "/foo/%2e/bar", "/foo/bar", true });
data.add(new Object[] { "/foo/.;/bar", "/foo/bar", true });
data.add(new Object[] { "/foo/%2e;/bar", "/foo/bar", true });
data.add(new Object[] { "/foo/.%2Fbar", "/foo/.%2Fbar", true });
data.add(new Object[] { "/foo/.%5Cbar", "/foo/.\\bar", true });
data.add(new Object[] { "/foo/bar/.", "/foo/bar", false });
data.add(new Object[] { "/foo/bar/./", "/foo/bar/", false });
data.add(new Object[] { "/foo/bar/.;", "/foo/bar", true });
data.add(new Object[] { "/foo/bar/./;", "/foo/bar/", false });
data.add(new Object[] { "/foo/.bar", "/foo/.bar", false });
data.add(new Object[] { "/foo/../bar", "/bar", false });
data.add(new Object[] { "/foo/../../bar", "/../bar", true });
data.add(new Object[] { "/../foo/bar", "/../foo/bar", true });
data.add(new Object[] { "/foo/%2e%2E/bar", "/bar", true });
data.add(new Object[] { "/foo/%2e%2e/%2E%2E/bar", "/../bar", true });
data.add(new Object[] { "/foo/./../bar", "/bar", false });
data.add(new Object[] { "/foo/..;/bar", "/bar", true });
data.add(new Object[] { "/foo/%2e%2E;/bar", "/bar", true });
data.add(new Object[] { "/foo/..%2Fbar", "/foo/..%2Fbar", true });
data.add(new Object[] { "/foo/..%5Cbar", "/foo/..\\bar", true });
data.add(new Object[] { "/foo/bar/..", "/foo", false });
data.add(new Object[] { "/foo/bar/../", "/foo/", false });
data.add(new Object[] { "/foo/bar/..;", "/foo", true });
data.add(new Object[] { "/foo/bar/../;", "/foo/", false });
data.add(new Object[] { "/foo/..bar", "/foo/..bar", false });
data.add(new Object[] { "/foo/.../bar", "/foo/.../bar", false });
data.add(new Object[] { "/foo//bar", "/foo/bar", false });
data.add(new Object[] { "//foo//bar//", "/foo/bar/", false });
data.add(new Object[] { "/;/foo;/;/bar/;/;", "/foo/bar/", true });
data.add(new Object[] { "/foo//../bar", "/bar", false });
data.add(new Object[] { "/foo/;/../bar", "/bar", true });
data.add(new Object[] { "/foo%E2%82%ACbar", "/foo€bar", false });
data.add(new Object[] { "/foo%20bar", "/foo bar", false });
data.add(new Object[] { "/foo%E2%82", "/foo%E2%82", true });
data.add(new Object[] { "/foo%E2%82bar", "/foo%E2%82bar", true });
data.add(new Object[] { "/foo%-1/bar", "/foo%-1/bar", true });
data.add(new Object[] { "/foo%XX/bar", "/foo%XX/bar", true });
data.add(new Object[] { "/foo%/bar", "/foo%/bar", true });
data.add(new Object[] { "/foo/bar%0", "/foo/bar%0", true });
data.add(new Object[] { "/good%20/bad%/%20mix%", "/good /bad%/%20mix%", true });
data.add(new Object[] { "/foo/bar?q", "/foo/bar", false });
data.add(new Object[] { "/foo/bar#f", "/foo/bar", true });
data.add(new Object[] { "/foo/bar?q#f", "/foo/bar", true });
data.add(new Object[] { "/foo/bar/?q", "/foo/bar/", false });
data.add(new Object[] { "/foo/bar/#f", "/foo/bar/", true });
data.add(new Object[] { "/foo/bar/?q#f", "/foo/bar/", true });
data.add(new Object[] { "/foo/bar;?q", "/foo/bar", false });
data.add(new Object[] { "/foo/bar;#f", "/foo/bar", true });
data.add(new Object[] { "/foo/bar;?q#f", "/foo/bar", true });
data.add(new Object[] { "/", "/", false });
data.add(new Object[] { "//", "/", false });
data.add(new Object[] { "/;/", "/", true });
data.add(new Object[] { "/.", "/", false });
data.add(new Object[] { "/..", "/..", true });
data.add(new Object[] { "/./", "/", false });
data.add(new Object[] { "/../", "/../", true });
data.add(new Object[] { "foo/bar/", "/foo/bar/", true });
data.add(new Object[] { "./foo/bar/", "/foo/bar/", true });
data.add(new Object[] { "%2e/foo/bar/", "/foo/bar/", true });
data.add(new Object[] { "../foo/bar/", "/../foo/bar/", true });
data.add(new Object[] { ".%2e/foo/bar/", "/../foo/bar/", true });
data.add(new Object[] { ";/foo/bar/", "/foo/bar/", true });
data.add(new Object[] { "/#f", "/", true });
data.add(new Object[] { "#f", "/", true });
data.add(new Object[] { "/?q", "/", false });
data.add(new Object[] { "?q", "/", true });

return data.stream().map(Arguments::of);
}

@ParameterizedTest
@MethodSource("data")
public void testCanonicalUriPath(String path, String expected, boolean rejected) {
List<String> rejections = new ArrayList<>();
String canonical = canonicalUriPath(path, rejections::add);

Assertions.assertEquals(expected, canonical);
Assertions.assertEquals(rejected, !rejections.isEmpty());

// print for inclusion in adoc
System.err.printf("| `%s` | `%s` | ", path, canonical);
if (!rejections.isEmpty()) {
for (int i = 0; i < rejections.size(); i++) {
System.err.print(i == 0 ? "400 " : " & ");
System.err.print(rejections.get(i));
}
}
System.err.println();
}
}
Loading

0 comments on commit 100fd4e

Please sign in to comment.