@@ -666,7 +666,64 @@ def test_urlsplit_remove_unsafe_bytes(self):
666
666
self .assertEqual (p .scheme , "https" )
667
667
self .assertEqual (p .geturl (), "https://www.python.org/javascript:alert('msg')/?query=something#fragment" )
668
668
669
+ def test_urlsplit_strip_url (self ):
670
+ noise = "" .join ([chr (i ) for i in range (0 , 0x20 + 1 )])
671
+ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
669
672
673
+ url = noise .decode ("utf-8" ) + base_url
674
+ p = urlparse .urlsplit (url )
675
+ self .assertEqual (p .scheme , "http" )
676
+ self .assertEqual (p .netloc , "User:Pass@www.python.org:080" )
677
+ self .assertEqual (p .path , "/doc/" )
678
+ self .assertEqual (p .query , "query=yes" )
679
+ self .assertEqual (p .fragment , "frag" )
680
+ self .assertEqual (p .username , "User" )
681
+ self .assertEqual (p .password , "Pass" )
682
+ self .assertEqual (p .hostname , "www.python.org" )
683
+ self .assertEqual (p .port , 80 )
684
+ self .assertEqual (p .geturl (), base_url )
685
+
686
+ url = noise + base_url .encode ("utf-8" )
687
+ p = urlparse .urlsplit (url )
688
+ self .assertEqual (p .scheme , b"http" )
689
+ self .assertEqual (p .netloc , b"User:Pass@www.python.org:080" )
690
+ self .assertEqual (p .path , b"/doc/" )
691
+ self .assertEqual (p .query , b"query=yes" )
692
+ self .assertEqual (p .fragment , b"frag" )
693
+ self .assertEqual (p .username , b"User" )
694
+ self .assertEqual (p .password , b"Pass" )
695
+ self .assertEqual (p .hostname , b"www.python.org" )
696
+ self .assertEqual (p .port , 80 )
697
+ self .assertEqual (p .geturl (), base_url .encode ("utf-8" ))
698
+
699
+ # Test that trailing space is preserved as some applications rely on
700
+ # this within query strings.
701
+ query_spaces_url = "https://www.python.org:88/doc/?query= "
702
+ p = urlparse .urlsplit (noise .decode ("utf-8" ) + query_spaces_url )
703
+ self .assertEqual (p .scheme , "https" )
704
+ self .assertEqual (p .netloc , "www.python.org:88" )
705
+ self .assertEqual (p .path , "/doc/" )
706
+ self .assertEqual (p .query , "query= " )
707
+ self .assertEqual (p .port , 88 )
708
+ self .assertEqual (p .geturl (), query_spaces_url )
709
+
710
+ p = urlparse .urlsplit ("www.pypi.org " )
711
+ # That "hostname" gets considered a "path" due to the
712
+ # trailing space and our existing logic... YUCK...
713
+ # and re-assembles via geturl aka unurlsplit into the original.
714
+ # django.core.validators.URLValidator (at least through v3.2) relies on
715
+ # this, for better or worse, to catch it in a ValidationError via its
716
+ # regular expressions.
717
+ # Here we test the basic round trip concept of such a trailing space.
718
+ self .assertEqual (urlparse .urlunsplit (p ), "www.pypi.org " )
719
+
720
+ # with scheme as cache-key
721
+ url = "//www.python.org/"
722
+ scheme = noise .decode ("utf-8" ) + "https" + noise .decode ("utf-8" )
723
+ for _ in range (2 ):
724
+ p = urlparse .urlsplit (url , scheme = scheme )
725
+ self .assertEqual (p .scheme , "https" )
726
+ self .assertEqual (p .geturl (), "https://www.python.org/" )
670
727
671
728
def test_attributes_bad_port (self ):
672
729
"""Check handling of non-integer ports."""
0 commit comments