@@ -94,6 +94,8 @@ final class UrlParser {
94
94
95
95
private boolean insideBrackets ;
96
96
97
+ private boolean stopMainLoop = false ;
98
+
97
99
98
100
private UrlParser (String input , @ Nullable UrlRecord base , @ Nullable Charset encoding , @ Nullable Consumer <String > validationErrorHandler ) {
99
101
this .input = new StringBuilder (input );
@@ -141,16 +143,20 @@ private UrlRecord basicUrlParser(@Nullable UrlRecord url, @Nullable State stateO
141
143
if (url == null ) {
142
144
// Set url to a new URL.
143
145
url = new UrlRecord ();
146
+ sanitizeInput (true );
147
+ }
148
+ else {
149
+ sanitizeInput (false );
144
150
}
145
- sanitizeInput ();
151
+
146
152
// Let state be state override if given, or scheme start state otherwise.
147
153
this .state = stateOverride != null ? stateOverride : State .SCHEME_START ;
148
154
this .stateOverride = stateOverride ;
149
155
150
156
// Keep running the following state machine by switching on state.
151
157
// If after a run pointer points to the EOF code point, go to the next step.
152
158
// Otherwise, increase pointer by 1 and continue with the state machine.
153
- while (this .pointer <= this .input .length ()) {
159
+ while (! this . stopMainLoop && this .pointer <= this .input .length ()) {
154
160
int c ;
155
161
if (this .pointer < this .input .length ()) {
156
162
c = this .input .charAt (this .pointer );
@@ -168,38 +174,46 @@ private UrlRecord basicUrlParser(@Nullable UrlRecord url, @Nullable State stateO
168
174
return url ;
169
175
}
170
176
171
- void sanitizeInput () {
177
+ void sanitizeInput (boolean removeC0ControlOrSpace ) {
172
178
boolean strip = true ;
173
179
for (int i = 0 ; i < this .input .length (); i ++) {
174
- char ch = this .input .charAt (i );
175
- if ((strip && (ch == ' ' || isC0Control (ch )))
176
- || (ch == '\t' || isNewline (ch ))) {
180
+ char c = this .input .charAt (i );
181
+ boolean isSpaceOrC0 = c == ' ' || isC0Control (c );
182
+ boolean isTabOrNL = c == '\t' || isNewline (c );
183
+ if ((strip && isSpaceOrC0 ) || isTabOrNL ) {
177
184
if (validate ()) {
178
185
// If input contains any leading (or trailing) C0 control or space, invalid-URL-unit validation error.
179
186
// If input contains any ASCII tab or newline, invalid-URL-unit validation error.
180
- validationError ("Code point \" " + ch + "\" is not a URL unit." );
187
+ validationError ("Code point \" " + c + "\" is not a URL unit." );
188
+ }
189
+ // Remove any leading C0 control or space from input.
190
+ if (removeC0ControlOrSpace && isSpaceOrC0 ) {
191
+ this .input .deleteCharAt (i );
192
+ }
193
+ else if (isTabOrNL ) {
194
+ // Remove all ASCII tab or newline from input.
195
+ this .input .deleteCharAt (i );
181
196
}
182
- // Remove any leading (and trailing) C0 control or space from input.
183
- // Remove all ASCII tab or newline from input.
184
- this .input .deleteCharAt (i );
185
197
i --;
186
198
}
187
199
else {
188
200
strip = false ;
189
201
}
190
202
}
191
- for (int i = this .input .length () - 1 ; i >= 0 ; i --) {
192
- char ch = this .input .charAt (i );
193
- if (ch == ' ' || isC0Control (ch )) {
194
- if (validate ()) {
195
- // If input contains any (leading or) trailing C0 control or space, invalid-URL-unit validation error.
196
- validationError ("Code point \" " + ch + "\" is not a URL unit." );
203
+ if (removeC0ControlOrSpace ) {
204
+ for (int i = this .input .length () - 1 ; i >= 0 ; i --) {
205
+ char c = this .input .charAt (i );
206
+ if (c == ' ' || isC0Control (c )) {
207
+ if (validate ()) {
208
+ // If input contains any (leading or) trailing C0 control or space, invalid-URL-unit validation error.
209
+ validationError ("Code point \" " + c + "\" is not a URL unit." );
210
+ }
211
+ // Remove any trailing C0 control or space from input.
212
+ this .input .deleteCharAt (i );
213
+ }
214
+ else {
215
+ break ;
197
216
}
198
- // Remove any (leading and) trailing C0 control or space from input.
199
- this .input .deleteCharAt (i );
200
- }
201
- else {
202
- break ;
203
217
}
204
218
}
205
219
}
@@ -377,7 +391,7 @@ private void emptyBuffer() {
377
391
}
378
392
379
393
private int remaining (int deltaPos ) {
380
- int pos = this .pointer + deltaPos ;
394
+ int pos = this .pointer + deltaPos + 1 ;
381
395
if (pos < this .input .length ()) {
382
396
return this .input .charAt (pos );
383
397
}
@@ -460,10 +474,43 @@ else if (len == 7) {
460
474
}
461
475
462
476
463
- private static boolean isWindowsDriveLetter (CharSequence s , boolean normalized ) {
464
- if (s .length () != 2 ) {
477
+ /**
478
+ * A Windows drive letter is two code points, of which the first is an ASCII alpha and the second is either U+003A (:) or U+007C (|).
479
+ *
480
+ * A normalized Windows drive letter is a Windows drive letter of which the second code point is U+003A (:).
481
+ */
482
+ private static boolean isWindowsDriveLetter (CharSequence input , boolean normalized ) {
483
+ if (input .length () != 2 ) {
465
484
return false ;
466
485
}
486
+ return isWindowsDriveLetterInternal (input , normalized );
487
+ }
488
+
489
+ /**
490
+ * A string starts with a Windows drive letter if all of the following are true:
491
+ *
492
+ * its length is greater than or equal to 2
493
+ * its first two code points are a Windows drive letter
494
+ * its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
495
+ */
496
+ private static boolean startsWithWindowsDriveLetter (CharSequence input ) {
497
+ int len = input .length ();
498
+ if (len < 2 ) {
499
+ return false ;
500
+ }
501
+ if (!isWindowsDriveLetterInternal (input , false )) {
502
+ return false ;
503
+ }
504
+ if (len == 2 ) {
505
+ return true ;
506
+ }
507
+ else {
508
+ char ch2 = input .charAt (2 );
509
+ return ch2 == '/' || ch2 == '\\' || ch2 == '?' || ch2 == '#' ;
510
+ }
511
+ }
512
+
513
+ private static boolean isWindowsDriveLetterInternal (CharSequence s , boolean normalized ) {
467
514
char ch0 = s .charAt (0 );
468
515
if (!isAsciiAlpha (ch0 )) {
469
516
return false ;
@@ -553,6 +600,7 @@ else if (c == ':') {
553
600
intPort .value () == defaultPort (url .scheme )) {
554
601
url .port = null ;
555
602
// Return.
603
+ p .stopMainLoop = true ;
556
604
return ;
557
605
}
558
606
}
@@ -561,7 +609,7 @@ else if (c == ':') {
561
609
// If url’s scheme is "file", then:
562
610
if (url .scheme .equals ("file" )) {
563
611
// If remaining does not start with "//", special-scheme-missing-following-solidus validation error.
564
- if (p .validate () && p .remaining (0 ) != '/' && p .remaining (1 ) != '/' ) {
612
+ if (p .validate () && ( p .remaining (0 ) != '/' || p .remaining (1 ) != '/' ) ) {
565
613
p .validationError ("\" file\" scheme not followed by \" //\" ." );
566
614
}
567
615
// Set state to file state.
@@ -636,7 +684,7 @@ else if (!"file".equals(p.base.scheme())) {
636
684
@ Override
637
685
public void handle (int c , UrlRecord url , UrlParser p ) {
638
686
// If c is U+002F (/) and remaining starts with U+002F (/), then set state to special authority ignore slashes state and increase pointer by 1.
639
- if (c == '/' && p .remaining (1 ) == '/' ) {
687
+ if (c == '/' && p .remaining (0 ) == '/' ) {
640
688
p .setState (SPECIAL_AUTHORITY_IGNORE_SLASHES );
641
689
p .pointer ++;
642
690
}
@@ -865,6 +913,7 @@ else if (c == ':' && !p.insideBrackets) {
865
913
}
866
914
// If state override is given and state override is hostname state, then return.
867
915
if (p .stateOverride == HOST ) {
916
+ p .stopMainLoop = true ;
868
917
return ;
869
918
}
870
919
// Let host be the result of host parsing buffer with url is not special.
@@ -888,6 +937,7 @@ else if ( (c == EOF || c == '/' || c == '?' || c == '#') ||
888
937
// Otherwise, if state override is given, buffer is the empty string, and either url includes credentials or url’s port is non-null, return.
889
938
else if (p .stateOverride != null && p .buffer .isEmpty () &&
890
939
(url .includesCredentials () || url .port () != null )) {
940
+ p .stopMainLoop = true ;
891
941
return ;
892
942
}
893
943
// EXTRA: if buffer is not empty
@@ -904,6 +954,7 @@ else if (p.stateOverride != null && p.buffer.isEmpty() &&
904
954
p .setState (PATH_START );
905
955
// If state override is given, then return.
906
956
if (p .stateOverride != null ) {
957
+ p .stopMainLoop = true ;
907
958
return ;
908
959
}
909
960
}
@@ -974,6 +1025,7 @@ else if (c == EOF || c == '/' || c == '?' || c == '#' ||
974
1025
}
975
1026
// If state override is given, then return.
976
1027
if (p .stateOverride != null ) {
1028
+ p .stopMainLoop = true ;
977
1029
return ;
978
1030
}
979
1031
// Set state to path start state and decrease pointer by 1.
@@ -1023,8 +1075,8 @@ else if (c != EOF) {
1023
1075
// Set url’s query to null.
1024
1076
url .query = null ;
1025
1077
// If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten url’s path.
1026
- String substring = p .input .substring (p .pointer , Math . min ( p . pointer + 2 , p . input . length ()) );
1027
- if (!isWindowsDriveLetter (substring , false )) {
1078
+ String substring = p .input .substring (p .pointer );
1079
+ if (!startsWithWindowsDriveLetter (substring )) {
1028
1080
url .shortenPath ();
1029
1081
}
1030
1082
// Otherwise:
@@ -1068,11 +1120,11 @@ public void handle(int c, UrlRecord url, UrlParser p) {
1068
1120
// Set url’s host to base’s host.
1069
1121
url .host = p .base .host ;
1070
1122
// If the code point substring from pointer to the end of input does not start with a Windows drive letter and base’s path[0] is a normalized Windows drive letter, then append base’s path[0] to url’s path.
1071
- String substring = p .input .substring (p .pointer , Math . min ( p . pointer + 2 , p . input . length ()) );
1072
- if (!isWindowsDriveLetter (substring , false ) &&
1123
+ String substring = p .input .substring (p .pointer );
1124
+ if (!startsWithWindowsDriveLetter (substring ) &&
1073
1125
p .base .path instanceof PathSegments basePath &&
1074
1126
!basePath .isEmpty () &&
1075
- isWindowsDriveLetter (basePath .get (0 ), false )) {
1127
+ isWindowsDriveLetter (basePath .get (0 ), true )) {
1076
1128
url .path .append (basePath .get (0 ));
1077
1129
}
1078
1130
}
@@ -1099,6 +1151,7 @@ else if (p.buffer.isEmpty()) {
1099
1151
url .host = EmptyHost .INSTANCE ;
1100
1152
// If state override is given, then return.
1101
1153
if (p .stateOverride != null ) {
1154
+ p .stopMainLoop = true ;
1102
1155
return ;
1103
1156
}
1104
1157
// Set state to path start state.
@@ -1116,6 +1169,7 @@ else if (p.buffer.isEmpty()) {
1116
1169
url .host = host ;
1117
1170
// If state override is given, then return.
1118
1171
if (p .stateOverride != null ) {
1172
+ p .stopMainLoop = true ;
1119
1173
return ;
1120
1174
}
1121
1175
// Set buffer to the empty string and state to path start state.
@@ -1186,7 +1240,7 @@ public void handle(int c, UrlRecord url, UrlParser p) {
1186
1240
// - state override is not given and c is U+003F (?) or U+0023 (#)
1187
1241
// then:
1188
1242
if (c == EOF || c == '/' ||
1189
- url .isSpecial () && c == '\\' ||
1243
+ ( url .isSpecial () && c == '\\' ) ||
1190
1244
(p .stateOverride == null && (c == '?' || c == '#' ))) {
1191
1245
// If url is special and c is U+005C (\), invalid-reverse-solidus validation error.
1192
1246
if (p .validate () && url .isSpecial () && c == '\\' ) {
@@ -1238,7 +1292,7 @@ else if (p.previousState != URL_TEMPLATE && c == '{') {
1238
1292
p .append (c );
1239
1293
p .setState (URL_TEMPLATE );
1240
1294
}
1241
- // Otherwise, basicUrlParser these steps:
1295
+ // Otherwise, run these steps:
1242
1296
else {
1243
1297
if (p .validate ()) {
1244
1298
// If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
0 commit comments