Skip to content

Commit 0c12c20

Browse files
authored
Merge branch 'main' into dynamic-build-rules
2 parents b6c2c3e + c649df6 commit 0c12c20

25 files changed

+1978
-1609
lines changed

Doc/library/urllib.parse.rst

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ or on combining URL components into a URL string.
159159
ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
160160
params='', query='', fragment='')
161161

162+
.. warning::
163+
164+
:func:`urlparse` does not perform validation. See :ref:`URL parsing
165+
security <url-parsing-security>` for details.
162166

163167
.. versionchanged:: 3.2
164168
Added IPv6 URL parsing capabilities.
@@ -324,8 +328,14 @@ or on combining URL components into a URL string.
324328
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
325329
decomposed before parsing, no error will be raised.
326330

327-
Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline
328-
``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL.
331+
Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0
332+
control and space characters are stripped from the URL. ``\n``,
333+
``\r`` and tab ``\t`` characters are removed from the URL at any position.
334+
335+
.. warning::
336+
337+
:func:`urlsplit` does not perform validation. See :ref:`URL parsing
338+
security <url-parsing-security>` for details.
329339

330340
.. versionchanged:: 3.6
331341
Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -338,6 +348,9 @@ or on combining URL components into a URL string.
338348
.. versionchanged:: 3.10
339349
ASCII newline and tab characters are stripped from the URL.
340350

351+
.. versionchanged:: 3.12
352+
Leading WHATWG C0 control and space characters are stripped from the URL.
353+
341354
.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
342355

343356
.. function:: urlunsplit(parts)
@@ -414,6 +427,35 @@ or on combining URL components into a URL string.
414427
or ``scheme://host/path``). If *url* is not a wrapped URL, it is returned
415428
without changes.
416429

430+
.. _url-parsing-security:
431+
432+
URL parsing security
433+
--------------------
434+
435+
The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation** of
436+
inputs. They may not raise errors on inputs that other applications consider
437+
invalid. They may also succeed on some inputs that might not be considered
438+
URLs elsewhere. Their purpose is for practical functionality rather than
439+
purity.
440+
441+
Instead of raising an exception on unusual input, they may instead return some
442+
component parts as empty strings. Or components may contain more than perhaps
443+
they should.
444+
445+
We recommend that users of these APIs where the values may be used anywhere
446+
with security implications code defensively. Do some verification within your
447+
code before trusting a returned component part. Does that ``scheme`` make
448+
sense? Is that a sensible ``path``? Is there anything strange about that
449+
``hostname``? etc.
450+
451+
What constitutes a URL is not universally well defined. Different applications
452+
have different needs and desired constraints. For instance the living `WHATWG
453+
spec`_ describes what user facing web clients such as a web browser require.
454+
While :rfc:`3986` is more general. These functions incorporate some aspects of
455+
both, but cannot be claimed compliant with either. The APIs and existing user
456+
code with expectations on specific behaviors predate both standards leading us
457+
to be very cautious about making API behavior changes.
458+
417459
.. _parsing-ascii-encoded-bytes:
418460

419461
Parsing ASCII Encoded Bytes

Include/internal/pycore_pylifecycle.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
3131

3232
extern void _Py_InitVersion(void);
3333
extern PyStatus _PyFaulthandler_Init(int enable);
34-
extern int _PyTraceMalloc_Init(int enable);
3534
extern PyObject * _PyBuiltin_Init(PyInterpreterState *interp);
3635
extern PyStatus _PySys_Create(
3736
PyThreadState *tstate,

Include/tracemalloc.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,40 @@ PyAPI_FUNC(int) PyTraceMalloc_Untrack(
3333
PyAPI_FUNC(PyObject*) _PyTraceMalloc_GetTraceback(
3434
unsigned int domain,
3535
uintptr_t ptr);
36+
37+
/* Return non-zero if tracemalloc is tracing */
38+
PyAPI_FUNC(int) _PyTraceMalloc_IsTracing(void);
39+
40+
/* Clear the tracemalloc traces */
41+
PyAPI_FUNC(void) _PyTraceMalloc_ClearTraces(void);
42+
43+
/* Clear the tracemalloc traces */
44+
PyAPI_FUNC(PyObject *) _PyTraceMalloc_GetTraces(void);
45+
46+
/* Clear tracemalloc traceback for an object */
47+
PyAPI_FUNC(PyObject *) _PyTraceMalloc_GetObjectTraceback(PyObject *obj);
48+
49+
/* Initialize tracemalloc */
50+
PyAPI_FUNC(int) _PyTraceMalloc_Init(void);
51+
52+
/* Start tracemalloc */
53+
PyAPI_FUNC(int) _PyTraceMalloc_Start(int max_nframe);
54+
55+
/* Stop tracemalloc */
56+
PyAPI_FUNC(void) _PyTraceMalloc_Stop(void);
57+
58+
/* Get the tracemalloc traceback limit */
59+
PyAPI_FUNC(int) _PyTraceMalloc_GetTracebackLimit(void);
60+
61+
/* Get the memory usage of tracemalloc in bytes */
62+
PyAPI_FUNC(size_t) _PyTraceMalloc_GetMemory(void);
63+
64+
/* Get the current size and peak size of traced memory blocks as a 2-tuple */
65+
PyAPI_FUNC(PyObject *) _PyTraceMalloc_GetTracedMemory(void);
66+
67+
/* Set the peak size of traced memory blocks to the current size */
68+
PyAPI_FUNC(void) _PyTraceMalloc_ResetPeak(void);
69+
3670
#endif
3771

3872
#endif /* !Py_TRACEMALLOC_H */

Lib/test/test_syntax.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,6 +1877,68 @@ def f(x: *b)
18771877
^^^^^^^^^^^
18781878
SyntaxError: bytes can only contain ASCII literal characters
18791879
1880+
Invalid expressions in type scopes:
1881+
1882+
>>> type A[T: (x:=3)] = int
1883+
Traceback (most recent call last):
1884+
...
1885+
SyntaxError: named expression cannot be used within a TypeVar bound
1886+
1887+
>>> type A[T: (yield 3)] = int
1888+
Traceback (most recent call last):
1889+
...
1890+
SyntaxError: yield expression cannot be used within a TypeVar bound
1891+
1892+
>>> type A[T: (await 3)] = int
1893+
Traceback (most recent call last):
1894+
...
1895+
SyntaxError: await expression cannot be used within a TypeVar bound
1896+
1897+
>>> type A[T: (yield from [])] = int
1898+
Traceback (most recent call last):
1899+
...
1900+
SyntaxError: yield expression cannot be used within a TypeVar bound
1901+
1902+
>>> type A = (x := 3)
1903+
Traceback (most recent call last):
1904+
...
1905+
SyntaxError: named expression cannot be used within a type alias
1906+
1907+
>>> type A = (yield 3)
1908+
Traceback (most recent call last):
1909+
...
1910+
SyntaxError: yield expression cannot be used within a type alias
1911+
1912+
>>> type A = (await 3)
1913+
Traceback (most recent call last):
1914+
...
1915+
SyntaxError: await expression cannot be used within a type alias
1916+
1917+
>>> type A = (yield from [])
1918+
Traceback (most recent call last):
1919+
...
1920+
SyntaxError: yield expression cannot be used within a type alias
1921+
1922+
>>> class A[T]((x := 3)): ...
1923+
Traceback (most recent call last):
1924+
...
1925+
SyntaxError: named expression cannot be used within the definition of a generic
1926+
1927+
>>> class A[T]((yield 3)): ...
1928+
Traceback (most recent call last):
1929+
...
1930+
SyntaxError: yield expression cannot be used within the definition of a generic
1931+
1932+
>>> class A[T]((await 3)): ...
1933+
Traceback (most recent call last):
1934+
...
1935+
SyntaxError: await expression cannot be used within the definition of a generic
1936+
1937+
>>> class A[T]((yield from [])): ...
1938+
Traceback (most recent call last):
1939+
...
1940+
SyntaxError: yield expression cannot be used within the definition of a generic
1941+
18801942
"""
18811943

18821944
import re

Lib/test/test_type_params.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -425,11 +425,11 @@ class Foo[T: Foo, U: (Foo, Foo)]:
425425
type_params = Foo.__type_params__
426426
self.assertEqual(len(type_params), 2)
427427
self.assertEqual(type_params[0].__name__, "T")
428-
self.assertEqual(type_params[0].__bound__, Foo)
429-
self.assertEqual(type_params[0].__constraints__, None)
428+
self.assertIs(type_params[0].__bound__, Foo)
429+
self.assertEqual(type_params[0].__constraints__, ())
430430

431431
self.assertEqual(type_params[1].__name__, "U")
432-
self.assertEqual(type_params[1].__bound__, None)
432+
self.assertIs(type_params[1].__bound__, None)
433433
self.assertEqual(type_params[1].__constraints__, (Foo, Foo))
434434

435435
def test_evaluation_error(self):
@@ -439,16 +439,16 @@ class Foo[T: Undefined, U: (Undefined,)]:
439439
type_params = Foo.__type_params__
440440
with self.assertRaises(NameError):
441441
type_params[0].__bound__
442-
self.assertEqual(type_params[0].__constraints__, None)
443-
self.assertEqual(type_params[1].__bound__, None)
442+
self.assertEqual(type_params[0].__constraints__, ())
443+
self.assertIs(type_params[1].__bound__, None)
444444
with self.assertRaises(NameError):
445445
type_params[1].__constraints__
446446

447447
Undefined = "defined"
448448
self.assertEqual(type_params[0].__bound__, "defined")
449-
self.assertEqual(type_params[0].__constraints__, None)
449+
self.assertEqual(type_params[0].__constraints__, ())
450450

451-
self.assertEqual(type_params[1].__bound__, None)
451+
self.assertIs(type_params[1].__bound__, None)
452452
self.assertEqual(type_params[1].__constraints__, ("defined",))
453453

454454

Lib/test/test_typing.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,41 @@ def test_basic_plain(self):
367367
self.assertEqual(T, T)
368368
# T is an instance of TypeVar
369369
self.assertIsInstance(T, TypeVar)
370+
self.assertEqual(T.__name__, 'T')
371+
self.assertEqual(T.__constraints__, ())
372+
self.assertIs(T.__bound__, None)
373+
self.assertIs(T.__covariant__, False)
374+
self.assertIs(T.__contravariant__, False)
375+
self.assertIs(T.__infer_variance__, False)
376+
377+
def test_attributes(self):
378+
T_bound = TypeVar('T_bound', bound=int)
379+
self.assertEqual(T_bound.__name__, 'T_bound')
380+
self.assertEqual(T_bound.__constraints__, ())
381+
self.assertIs(T_bound.__bound__, int)
382+
383+
T_constraints = TypeVar('T_constraints', int, str)
384+
self.assertEqual(T_constraints.__name__, 'T_constraints')
385+
self.assertEqual(T_constraints.__constraints__, (int, str))
386+
self.assertIs(T_constraints.__bound__, None)
387+
388+
T_co = TypeVar('T_co', covariant=True)
389+
self.assertEqual(T_co.__name__, 'T_co')
390+
self.assertIs(T_co.__covariant__, True)
391+
self.assertIs(T_co.__contravariant__, False)
392+
self.assertIs(T_co.__infer_variance__, False)
393+
394+
T_contra = TypeVar('T_contra', contravariant=True)
395+
self.assertEqual(T_contra.__name__, 'T_contra')
396+
self.assertIs(T_contra.__covariant__, False)
397+
self.assertIs(T_contra.__contravariant__, True)
398+
self.assertIs(T_contra.__infer_variance__, False)
399+
400+
T_infer = TypeVar('T_infer', infer_variance=True)
401+
self.assertEqual(T_infer.__name__, 'T_infer')
402+
self.assertIs(T_infer.__covariant__, False)
403+
self.assertIs(T_infer.__contravariant__, False)
404+
self.assertIs(T_infer.__infer_variance__, True)
370405

371406
def test_typevar_instance_type_error(self):
372407
T = TypeVar('T')
@@ -458,6 +493,12 @@ def test_no_bivariant(self):
458493
with self.assertRaises(ValueError):
459494
TypeVar('T', covariant=True, contravariant=True)
460495

496+
def test_cannot_combine_explicit_and_infer(self):
497+
with self.assertRaises(ValueError):
498+
TypeVar('T', covariant=True, infer_variance=True)
499+
with self.assertRaises(ValueError):
500+
TypeVar('T', contravariant=True, infer_variance=True)
501+
461502
def test_var_substitution(self):
462503
T = TypeVar('T')
463504
subst = T.__typing_subst__
@@ -7812,6 +7853,7 @@ def test_basic_plain(self):
78127853
P = ParamSpec('P')
78137854
self.assertEqual(P, P)
78147855
self.assertIsInstance(P, ParamSpec)
7856+
self.assertEqual(P.__name__, 'P')
78157857

78167858
def test_valid_uses(self):
78177859
P = ParamSpec('P')

Lib/test/test_urlparse.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,14 +654,73 @@ def test_urlsplit_remove_unsafe_bytes(self):
654654
self.assertEqual(p.scheme, "http")
655655
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
656656

657+
def test_urlsplit_strip_url(self):
658+
noise = bytes(range(0, 0x20 + 1))
659+
base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
660+
661+
url = noise.decode("utf-8") + base_url
662+
p = urllib.parse.urlsplit(url)
663+
self.assertEqual(p.scheme, "http")
664+
self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
665+
self.assertEqual(p.path, "/doc/")
666+
self.assertEqual(p.query, "query=yes")
667+
self.assertEqual(p.fragment, "frag")
668+
self.assertEqual(p.username, "User")
669+
self.assertEqual(p.password, "Pass")
670+
self.assertEqual(p.hostname, "www.python.org")
671+
self.assertEqual(p.port, 80)
672+
self.assertEqual(p.geturl(), base_url)
673+
674+
url = noise + base_url.encode("utf-8")
675+
p = urllib.parse.urlsplit(url)
676+
self.assertEqual(p.scheme, b"http")
677+
self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
678+
self.assertEqual(p.path, b"/doc/")
679+
self.assertEqual(p.query, b"query=yes")
680+
self.assertEqual(p.fragment, b"frag")
681+
self.assertEqual(p.username, b"User")
682+
self.assertEqual(p.password, b"Pass")
683+
self.assertEqual(p.hostname, b"www.python.org")
684+
self.assertEqual(p.port, 80)
685+
self.assertEqual(p.geturl(), base_url.encode("utf-8"))
686+
687+
# Test that trailing space is preserved as some applications rely on
688+
# this within query strings.
689+
query_spaces_url = "https://www.python.org:88/doc/?query= "
690+
p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
691+
self.assertEqual(p.scheme, "https")
692+
self.assertEqual(p.netloc, "www.python.org:88")
693+
self.assertEqual(p.path, "/doc/")
694+
self.assertEqual(p.query, "query= ")
695+
self.assertEqual(p.port, 88)
696+
self.assertEqual(p.geturl(), query_spaces_url)
697+
698+
p = urllib.parse.urlsplit("www.pypi.org ")
699+
# That "hostname" gets considered a "path" due to the
700+
# trailing space and our existing logic... YUCK...
701+
# and re-assembles via geturl aka unurlsplit into the original.
702+
# django.core.validators.URLValidator (at least through v3.2) relies on
703+
# this, for better or worse, to catch it in a ValidationError via its
704+
# regular expressions.
705+
# Here we test the basic round trip concept of such a trailing space.
706+
self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
707+
708+
# with scheme as cache-key
709+
url = "//www.python.org/"
710+
scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
711+
for _ in range(2):
712+
p = urllib.parse.urlsplit(url, scheme=scheme)
713+
self.assertEqual(p.scheme, "https")
714+
self.assertEqual(p.geturl(), "https://www.python.org/")
715+
657716
def test_attributes_bad_port(self):
658717
"""Check handling of invalid ports."""
659718
for bytes in (False, True):
660719
for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
661720
for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
662721
with self.subTest(bytes=bytes, parse=parse, port=port):
663722
netloc = "www.example.net:" + port
664-
url = "http://" + netloc
723+
url = "http://" + netloc + "/"
665724
if bytes:
666725
if netloc.isascii() and port.isascii():
667726
netloc = netloc.encode("ascii")

0 commit comments

Comments
 (0)