103
103
104
104
class UrlParseTestCase (unittest .TestCase ):
105
105
106
- def checkRoundtrips (self , url , parsed , split ):
106
+ def checkRoundtrips (self , url , parsed , split , url2 = None ):
107
+ if url2 is None :
108
+ url2 = url
107
109
result = urllib .parse .urlparse (url )
108
110
self .assertSequenceEqual (result , parsed )
109
111
t = (result .scheme , result .netloc , result .path ,
110
112
result .params , result .query , result .fragment )
111
113
self .assertSequenceEqual (t , parsed )
112
114
# put it back together and it should be the same
113
115
result2 = urllib .parse .urlunparse (result )
114
- self .assertSequenceEqual (result2 , url )
116
+ self .assertSequenceEqual (result2 , url2 )
115
117
self .assertSequenceEqual (result2 , result .geturl ())
116
118
117
119
# the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
137
139
result .query , result .fragment )
138
140
self .assertSequenceEqual (t , split )
139
141
result2 = urllib .parse .urlunsplit (result )
140
- self .assertSequenceEqual (result2 , url )
142
+ self .assertSequenceEqual (result2 , url2 )
141
143
self .assertSequenceEqual (result2 , result .geturl ())
142
144
143
145
# check the fixpoint property of re-parsing the result of geturl()
@@ -175,9 +177,39 @@ def test_qs(self):
175
177
176
178
def test_roundtrips (self ):
177
179
str_cases = [
180
+ ('path/to/file' ,
181
+ ('' , '' , 'path/to/file' , '' , '' , '' ),
182
+ ('' , '' , 'path/to/file' , '' , '' )),
183
+ ('/path/to/file' ,
184
+ ('' , '' , '/path/to/file' , '' , '' , '' ),
185
+ ('' , '' , '/path/to/file' , '' , '' )),
186
+ ('//path/to/file' ,
187
+ ('' , 'path' , '/to/file' , '' , '' , '' ),
188
+ ('' , 'path' , '/to/file' , '' , '' )),
189
+ ('////path/to/file' ,
190
+ ('' , '' , '//path/to/file' , '' , '' , '' ),
191
+ ('' , '' , '//path/to/file' , '' , '' )),
192
+ ('scheme:path/to/file' ,
193
+ ('scheme' , '' , 'path/to/file' , '' , '' , '' ),
194
+ ('scheme' , '' , 'path/to/file' , '' , '' )),
195
+ ('scheme:/path/to/file' ,
196
+ ('scheme' , '' , '/path/to/file' , '' , '' , '' ),
197
+ ('scheme' , '' , '/path/to/file' , '' , '' )),
198
+ ('scheme://path/to/file' ,
199
+ ('scheme' , 'path' , '/to/file' , '' , '' , '' ),
200
+ ('scheme' , 'path' , '/to/file' , '' , '' )),
201
+ ('scheme:////path/to/file' ,
202
+ ('scheme' , '' , '//path/to/file' , '' , '' , '' ),
203
+ ('scheme' , '' , '//path/to/file' , '' , '' )),
178
204
('file:///tmp/junk.txt' ,
179
205
('file' , '' , '/tmp/junk.txt' , '' , '' , '' ),
180
206
('file' , '' , '/tmp/junk.txt' , '' , '' )),
207
+ ('file:////tmp/junk.txt' ,
208
+ ('file' , '' , '//tmp/junk.txt' , '' , '' , '' ),
209
+ ('file' , '' , '//tmp/junk.txt' , '' , '' )),
210
+ ('file://///tmp/junk.txt' ,
211
+ ('file' , '' , '///tmp/junk.txt' , '' , '' , '' ),
212
+ ('file' , '' , '///tmp/junk.txt' , '' , '' )),
181
213
('imap://mail.python.org/mbox1' ,
182
214
('imap' , 'mail.python.org' , '/mbox1' , '' , '' , '' ),
183
215
('imap' , 'mail.python.org' , '/mbox1' , '' , '' )),
@@ -213,6 +245,38 @@ def _encode(t):
213
245
for url , parsed , split in str_cases + bytes_cases :
214
246
self .checkRoundtrips (url , parsed , split )
215
247
248
+ def test_roundtrips_normalization (self ):
249
+ str_cases = [
250
+ ('///path/to/file' ,
251
+ '/path/to/file' ,
252
+ ('' , '' , '/path/to/file' , '' , '' , '' ),
253
+ ('' , '' , '/path/to/file' , '' , '' )),
254
+ ('scheme:///path/to/file' ,
255
+ 'scheme:/path/to/file' ,
256
+ ('scheme' , '' , '/path/to/file' , '' , '' , '' ),
257
+ ('scheme' , '' , '/path/to/file' , '' , '' )),
258
+ ('file:/tmp/junk.txt' ,
259
+ 'file:///tmp/junk.txt' ,
260
+ ('file' , '' , '/tmp/junk.txt' , '' , '' , '' ),
261
+ ('file' , '' , '/tmp/junk.txt' , '' , '' )),
262
+ ('http:/tmp/junk.txt' ,
263
+ 'http:///tmp/junk.txt' ,
264
+ ('http' , '' , '/tmp/junk.txt' , '' , '' , '' ),
265
+ ('http' , '' , '/tmp/junk.txt' , '' , '' )),
266
+ ('https:/tmp/junk.txt' ,
267
+ 'https:///tmp/junk.txt' ,
268
+ ('https' , '' , '/tmp/junk.txt' , '' , '' , '' ),
269
+ ('https' , '' , '/tmp/junk.txt' , '' , '' )),
270
+ ]
271
+ def _encode (t ):
272
+ return (t [0 ].encode ('ascii' ),
273
+ t [1 ].encode ('ascii' ),
274
+ tuple (x .encode ('ascii' ) for x in t [2 ]),
275
+ tuple (x .encode ('ascii' ) for x in t [3 ]))
276
+ bytes_cases = [_encode (x ) for x in str_cases ]
277
+ for url , url2 , parsed , split in str_cases + bytes_cases :
278
+ self .checkRoundtrips (url , parsed , split , url2 )
279
+
216
280
def test_http_roundtrips (self ):
217
281
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
218
282
# so we test both 'http:' and 'https:' in all the following.
0 commit comments