Skip to content

Commit 62a1b55

Browse files
committed
Resolve UrlParser review comments
See gh-32513
1 parent aa05a6a commit 62a1b55

File tree

1 file changed

+87
-33
lines changed

1 file changed

+87
-33
lines changed

Diff for: spring-web/src/main/java/org/springframework/web/util/UrlParser.java

+87-33
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ final class UrlParser {
9494

9595
private boolean insideBrackets;
9696

97+
private boolean stopMainLoop = false;
98+
9799

98100
private UrlParser(String input, @Nullable UrlRecord base, @Nullable Charset encoding, @Nullable Consumer<String> validationErrorHandler) {
99101
this.input = new StringBuilder(input);
@@ -141,16 +143,20 @@ private UrlRecord basicUrlParser(@Nullable UrlRecord url, @Nullable State stateO
141143
if (url == null) {
142144
// Set url to a new URL.
143145
url = new UrlRecord();
146+
sanitizeInput(true);
147+
}
148+
else {
149+
sanitizeInput(false);
144150
}
145-
sanitizeInput();
151+
146152
// Let state be state override if given, or scheme start state otherwise.
147153
this.state = stateOverride != null ? stateOverride : State.SCHEME_START;
148154
this.stateOverride = stateOverride;
149155

150156
// Keep running the following state machine by switching on state.
151157
// If after a run pointer points to the EOF code point, go to the next step.
152158
// Otherwise, increase pointer by 1 and continue with the state machine.
153-
while (this.pointer <= this.input.length()) {
159+
while (!this.stopMainLoop && this.pointer <= this.input.length()) {
154160
int c;
155161
if (this.pointer < this.input.length()) {
156162
c = this.input.charAt(this.pointer);
@@ -168,38 +174,46 @@ private UrlRecord basicUrlParser(@Nullable UrlRecord url, @Nullable State stateO
168174
return url;
169175
}
170176

171-
void sanitizeInput() {
177+
void sanitizeInput(boolean removeC0ControlOrSpace) {
172178
boolean strip = true;
173179
for (int i = 0; i < this.input.length(); i++) {
174-
char ch = this.input.charAt(i);
175-
if ((strip && (ch == ' ' || isC0Control(ch)))
176-
|| (ch == '\t' || isNewline(ch))) {
180+
char c = this.input.charAt(i);
181+
boolean isSpaceOrC0 = c == ' ' || isC0Control(c);
182+
boolean isTabOrNL = c == '\t' || isNewline(c);
183+
if ((strip && isSpaceOrC0) || isTabOrNL) {
177184
if (validate()) {
178185
// If input contains any leading (or trailing) C0 control or space, invalid-URL-unit validation error.
179186
// If input contains any ASCII tab or newline, invalid-URL-unit validation error.
180-
validationError("Code point \"" + ch + "\" is not a URL unit.");
187+
validationError("Code point \"" + c + "\" is not a URL unit.");
188+
}
189+
// Remove any leading C0 control or space from input.
190+
if (removeC0ControlOrSpace && isSpaceOrC0) {
191+
this.input.deleteCharAt(i);
192+
}
193+
else if (isTabOrNL) {
194+
// Remove all ASCII tab or newline from input.
195+
this.input.deleteCharAt(i);
181196
}
182-
// Remove any leading (and trailing) C0 control or space from input.
183-
// Remove all ASCII tab or newline from input.
184-
this.input.deleteCharAt(i);
185197
i--;
186198
}
187199
else {
188200
strip = false;
189201
}
190202
}
191-
for (int i = this.input.length() - 1; i >= 0; i--) {
192-
char ch = this.input.charAt(i);
193-
if (ch == ' ' || isC0Control(ch)) {
194-
if (validate()) {
195-
// If input contains any (leading or) trailing C0 control or space, invalid-URL-unit validation error.
196-
validationError("Code point \"" + ch + "\" is not a URL unit.");
203+
if (removeC0ControlOrSpace) {
204+
for (int i = this.input.length() - 1; i >= 0; i--) {
205+
char c = this.input.charAt(i);
206+
if (c == ' ' || isC0Control(c)) {
207+
if (validate()) {
208+
// If input contains any (leading or) trailing C0 control or space, invalid-URL-unit validation error.
209+
validationError("Code point \"" + c + "\" is not a URL unit.");
210+
}
211+
// Remove any trailing C0 control or space from input.
212+
this.input.deleteCharAt(i);
213+
}
214+
else {
215+
break;
197216
}
198-
// Remove any (leading and) trailing C0 control or space from input.
199-
this.input.deleteCharAt(i);
200-
}
201-
else {
202-
break;
203217
}
204218
}
205219
}
@@ -377,7 +391,7 @@ private void emptyBuffer() {
377391
}
378392

379393
private int remaining(int deltaPos) {
380-
int pos = this.pointer + deltaPos;
394+
int pos = this.pointer + deltaPos + 1;
381395
if (pos < this.input.length()) {
382396
return this.input.charAt(pos);
383397
}
@@ -460,10 +474,43 @@ else if (len == 7) {
460474
}
461475

462476

463-
private static boolean isWindowsDriveLetter(CharSequence s, boolean normalized) {
464-
if (s.length() != 2) {
477+
/**
478+
* A Windows drive letter is two code points, of which the first is an ASCII alpha and the second is either U+003A (:) or U+007C (|).
479+
*
480+
* A normalized Windows drive letter is a Windows drive letter of which the second code point is U+003A (:).
481+
*/
482+
private static boolean isWindowsDriveLetter(CharSequence input, boolean normalized) {
483+
if (input.length() != 2) {
465484
return false;
466485
}
486+
return isWindowsDriveLetterInternal(input, normalized);
487+
}
488+
489+
/**
490+
* A string starts with a Windows drive letter if all of the following are true:
491+
*
492+
* its length is greater than or equal to 2
493+
* its first two code points are a Windows drive letter
494+
* its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
495+
*/
496+
private static boolean startsWithWindowsDriveLetter(CharSequence input) {
497+
int len = input.length();
498+
if (len < 2) {
499+
return false;
500+
}
501+
if (!isWindowsDriveLetterInternal(input, false)) {
502+
return false;
503+
}
504+
if (len == 2) {
505+
return true;
506+
}
507+
else {
508+
char ch2 = input.charAt(2);
509+
return ch2 == '/' || ch2 == '\\' || ch2 == '?' || ch2 == '#';
510+
}
511+
}
512+
513+
private static boolean isWindowsDriveLetterInternal(CharSequence s, boolean normalized) {
467514
char ch0 = s.charAt(0);
468515
if (!isAsciiAlpha(ch0)) {
469516
return false;
@@ -553,6 +600,7 @@ else if (c == ':') {
553600
intPort.value() == defaultPort(url.scheme)) {
554601
url.port = null;
555602
// Return.
603+
p.stopMainLoop = true;
556604
return;
557605
}
558606
}
@@ -561,7 +609,7 @@ else if (c == ':') {
561609
// If url’s scheme is "file", then:
562610
if (url.scheme.equals("file")) {
563611
// If remaining does not start with "//", special-scheme-missing-following-solidus validation error.
564-
if (p.validate() && p.remaining(0) != '/' && p.remaining(1) != '/') {
612+
if (p.validate() && (p.remaining(0) != '/' || p.remaining(1) != '/')) {
565613
p.validationError("\"file\" scheme not followed by \"//\".");
566614
}
567615
// Set state to file state.
@@ -636,7 +684,7 @@ else if (!"file".equals(p.base.scheme())) {
636684
@Override
637685
public void handle(int c, UrlRecord url, UrlParser p) {
638686
// If c is U+002F (/) and remaining starts with U+002F (/), then set state to special authority ignore slashes state and increase pointer by 1.
639-
if (c == '/' && p.remaining(1) == '/') {
687+
if (c == '/' && p.remaining(0) == '/') {
640688
p.setState(SPECIAL_AUTHORITY_IGNORE_SLASHES);
641689
p.pointer++;
642690
}
@@ -865,6 +913,7 @@ else if (c == ':' && !p.insideBrackets) {
865913
}
866914
// If state override is given and state override is hostname state, then return.
867915
if (p.stateOverride == HOST) {
916+
p.stopMainLoop = true;
868917
return;
869918
}
870919
// Let host be the result of host parsing buffer with url is not special.
@@ -888,6 +937,7 @@ else if ( (c == EOF || c == '/' || c == '?' || c == '#') ||
888937
// Otherwise, if state override is given, buffer is the empty string, and either url includes credentials or url’s port is non-null, return.
889938
else if (p.stateOverride != null && p.buffer.isEmpty() &&
890939
(url.includesCredentials() || url.port() != null )) {
940+
p.stopMainLoop = true;
891941
return;
892942
}
893943
// EXTRA: if buffer is not empty
@@ -904,6 +954,7 @@ else if (p.stateOverride != null && p.buffer.isEmpty() &&
904954
p.setState(PATH_START);
905955
// If state override is given, then return.
906956
if (p.stateOverride != null) {
957+
p.stopMainLoop = true;
907958
return;
908959
}
909960
}
@@ -974,6 +1025,7 @@ else if (c == EOF || c == '/' || c == '?' || c == '#' ||
9741025
}
9751026
// If state override is given, then return.
9761027
if (p.stateOverride != null) {
1028+
p.stopMainLoop = true;
9771029
return;
9781030
}
9791031
// Set state to path start state and decrease pointer by 1.
@@ -1023,8 +1075,8 @@ else if (c != EOF) {
10231075
// Set url’s query to null.
10241076
url.query = null;
10251077
// If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten url’s path.
1026-
String substring = p.input.substring(p.pointer, Math.min(p.pointer + 2, p.input.length()));
1027-
if (!isWindowsDriveLetter(substring, false)) {
1078+
String substring = p.input.substring(p.pointer);
1079+
if (!startsWithWindowsDriveLetter(substring)) {
10281080
url.shortenPath();
10291081
}
10301082
// Otherwise:
@@ -1068,11 +1120,11 @@ public void handle(int c, UrlRecord url, UrlParser p) {
10681120
// Set url’s host to base’s host.
10691121
url.host = p.base.host;
10701122
// If the code point substring from pointer to the end of input does not start with a Windows drive letter and base’s path[0] is a normalized Windows drive letter, then append base’s path[0] to url’s path.
1071-
String substring = p.input.substring(p.pointer, Math.min(p.pointer + 2, p.input.length()));
1072-
if (!isWindowsDriveLetter(substring, false) &&
1123+
String substring = p.input.substring(p.pointer);
1124+
if (!startsWithWindowsDriveLetter(substring) &&
10731125
p.base.path instanceof PathSegments basePath &&
10741126
!basePath.isEmpty() &&
1075-
isWindowsDriveLetter(basePath.get(0), false)) {
1127+
isWindowsDriveLetter(basePath.get(0), true)) {
10761128
url.path.append(basePath.get(0));
10771129
}
10781130
}
@@ -1099,6 +1151,7 @@ else if (p.buffer.isEmpty()) {
10991151
url.host = EmptyHost.INSTANCE;
11001152
// If state override is given, then return.
11011153
if (p.stateOverride != null) {
1154+
p.stopMainLoop = true;
11021155
return;
11031156
}
11041157
// Set state to path start state.
@@ -1116,6 +1169,7 @@ else if (p.buffer.isEmpty()) {
11161169
url.host = host;
11171170
// If state override is given, then return.
11181171
if (p.stateOverride != null) {
1172+
p.stopMainLoop = true;
11191173
return;
11201174
}
11211175
// Set buffer to the empty string and state to path start state.
@@ -1186,7 +1240,7 @@ public void handle(int c, UrlRecord url, UrlParser p) {
11861240
// - state override is not given and c is U+003F (?) or U+0023 (#)
11871241
// then:
11881242
if (c == EOF || c == '/' ||
1189-
url.isSpecial() && c == '\\' ||
1243+
(url.isSpecial() && c == '\\') ||
11901244
(p.stateOverride == null && (c == '?' || c == '#'))) {
11911245
// If url is special and c is U+005C (\), invalid-reverse-solidus validation error.
11921246
if (p.validate() && url.isSpecial() && c == '\\') {
@@ -1238,7 +1292,7 @@ else if (p.previousState != URL_TEMPLATE && c == '{') {
12381292
p.append(c);
12391293
p.setState(URL_TEMPLATE);
12401294
}
1241-
// Otherwise, basicUrlParser these steps:
1295+
// Otherwise, run these steps:
12421296
else {
12431297
if (p.validate()) {
12441298
// If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.

0 commit comments

Comments
 (0)