Skip to content

Commit a81849b

Browse files
authored
bpo-39939: Add str.removeprefix and str.removesuffix (GH-18939)
Added str.removeprefix and str.removesuffix methods and corresponding bytes, bytearray, and collections.UserString methods to remove affixes from a string if present. See PEP 616 for a full description.
1 parent 39652cd commit a81849b

13 files changed

+597
-6
lines changed

Doc/library/stdtypes.rst

+102-2
Original file line numberDiff line numberDiff line change
@@ -1549,6 +1549,33 @@ expression support in the :mod:`re` module).
15491549
interpreted as in slice notation.
15501550

15511551

1552+
.. method:: str.removeprefix(prefix, /)
1553+
1554+
If the string starts with the *prefix* string, return
1555+
``string[len(prefix):]``. Otherwise, return a copy of the original
1556+
string::
1557+
1558+
>>> 'TestHook'.removeprefix('Test')
1559+
'Hook'
1560+
>>> 'BaseTestCase'.removeprefix('Test')
1561+
'BaseTestCase'
1562+
1563+
.. versionadded:: 3.9
1564+
1565+
.. method:: str.removesuffix(suffix, /)
1566+
1567+
If the string ends with the *suffix* string and that *suffix* is not empty,
1568+
return ``string[:-len(suffix)]``. Otherwise, return a copy of the
1569+
original string::
1570+
1571+
>>> 'MiscTests'.removesuffix('Tests')
1572+
'Misc'
1573+
>>> 'TmpDirMixin'.removesuffix('Tests')
1574+
'TmpDirMixin'
1575+
1576+
.. versionadded:: 3.9
1577+
1578+
15521579
.. method:: str.encode(encoding="utf-8", errors="strict")
15531580

15541581
Return an encoded version of the string as a bytes object. Default encoding
@@ -1831,6 +1858,14 @@ expression support in the :mod:`re` module).
18311858
>>> 'www.example.com'.lstrip('cmowz.')
18321859
'example.com'
18331860

1861+
See :meth:`str.removeprefix` for a method that will remove a single prefix
1862+
string rather than all of a set of characters. For example::
1863+
1864+
>>> 'Arthur: three!'.lstrip('Arthur: ')
1865+
'ee!'
1866+
>>> 'Arthur: three!'.removeprefix('Arthur: ')
1867+
'three!'
1868+
18341869

18351870
.. staticmethod:: str.maketrans(x[, y[, z]])
18361871

@@ -1911,6 +1946,13 @@ expression support in the :mod:`re` module).
19111946
>>> 'mississippi'.rstrip('ipz')
19121947
'mississ'
19131948

1949+
See :meth:`str.removesuffix` for a method that will remove a single suffix
1950+
string rather than all of a set of characters. For example::
1951+
1952+
>>> 'Monty Python'.rstrip(' Python')
1953+
'M'
1954+
>>> 'Monty Python'.removesuffix(' Python')
1955+
'Monty'
19141956

19151957
.. method:: str.split(sep=None, maxsplit=-1)
19161958

@@ -2591,6 +2633,50 @@ arbitrary binary data.
25912633
Also accept an integer in the range 0 to 255 as the subsequence.
25922634

25932635

2636+
.. method:: bytes.removeprefix(prefix, /)
2637+
bytearray.removeprefix(prefix, /)
2638+
2639+
If the binary data starts with the *prefix* string, return
2640+
``bytes[len(prefix):]``. Otherwise, return a copy of the original
2641+
binary data::
2642+
2643+
>>> b'TestHook'.removeprefix(b'Test')
2644+
b'Hook'
2645+
>>> b'BaseTestCase'.removeprefix(b'Test')
2646+
b'BaseTestCase'
2647+
2648+
The *prefix* may be any :term:`bytes-like object`.
2649+
2650+
.. note::
2651+
2652+
The bytearray version of this method does *not* operate in place -
2653+
it always produces a new object, even if no changes were made.
2654+
2655+
.. versionadded:: 3.9
2656+
2657+
2658+
.. method:: bytes.removesuffix(suffix, /)
2659+
bytearray.removesuffix(suffix, /)
2660+
2661+
If the binary data ends with the *suffix* string and that *suffix* is
2662+
not empty, return ``bytes[:-len(suffix)]``. Otherwise, return a copy of
2663+
the original binary data::
2664+
2665+
>>> b'MiscTests'.removesuffix(b'Tests')
2666+
b'Misc'
2667+
>>> b'TmpDirMixin'.removesuffix(b'Tests')
2668+
b'TmpDirMixin'
2669+
2670+
The *suffix* may be any :term:`bytes-like object`.
2671+
2672+
.. note::
2673+
2674+
The bytearray version of this method does *not* operate in place -
2675+
it always produces a new object, even if no changes were made.
2676+
2677+
.. versionadded:: 3.9
2678+
2679+
25942680
.. method:: bytes.decode(encoding="utf-8", errors="strict")
25952681
bytearray.decode(encoding="utf-8", errors="strict")
25962682

@@ -2841,7 +2927,14 @@ produce new objects.
28412927
b'example.com'
28422928

28432929
The binary sequence of byte values to remove may be any
2844-
:term:`bytes-like object`.
2930+
:term:`bytes-like object`. See :meth:`~bytes.removeprefix` for a method
2931+
that will remove a single prefix string rather than all of a set of
2932+
characters. For example::
2933+
2934+
>>> b'Arthur: three!'.lstrip(b'Arthur: ')
2935+
b'ee!'
2936+
>>> b'Arthur: three!'.removeprefix(b'Arthur: ')
2937+
b'three!'
28452938

28462939
.. note::
28472940

@@ -2890,7 +2983,14 @@ produce new objects.
28902983
b'mississ'
28912984

28922985
The binary sequence of byte values to remove may be any
2893-
:term:`bytes-like object`.
2986+
:term:`bytes-like object`. See :meth:`~bytes.removesuffix` for a method
2987+
that will remove a single suffix string rather than all of a set of
2988+
characters. For example::
2989+
2990+
>>> b'Monty Python'.rstrip(b' Python')
2991+
b'M'
2992+
>>> b'Monty Python'.removesuffix(b' Python')
2993+
b'Monty'
28942994

28952995
.. note::
28962996

Doc/whatsnew/3.9.rst

+10
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,16 @@ Merge (``|``) and update (``|=``) operators have been added to the built-in
105105
:class:`dict` class. See :pep:`584` for a full description.
106106
(Contributed by Brandt Bucher in :issue:`36144`.)
107107

108+
PEP 616: New removeprefix() and removesuffix() string methods
109+
-------------------------------------------------------------
110+
111+
:meth:`str.removeprefix(prefix)<str.removeprefix>` and
112+
:meth:`str.removesuffix(suffix)<str.removesuffix>` have been added
113+
to easily remove an unneeded prefix or a suffix from a string. Corresponding
114+
``bytes``, ``bytearray``, and ``collections.UserString`` methods have also been
115+
added. See :pep:`616` for a full description. (Contributed by Dennis Sweeney in
116+
:issue:`18939`.)
117+
108118

109119
Other Language Changes
110120
======================

Lib/collections/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,14 @@ def count(self, sub, start=0, end=_sys.maxsize):
12391239
if isinstance(sub, UserString):
12401240
sub = sub.data
12411241
return self.data.count(sub, start, end)
1242+
def removeprefix(self, prefix, /):
1243+
if isinstance(prefix, UserString):
1244+
prefix = prefix.data
1245+
return self.__class__(self.data.removeprefix(prefix))
1246+
def removesuffix(self, suffix, /):
1247+
if isinstance(suffix, UserString):
1248+
suffix = suffix.data
1249+
return self.__class__(self.data.removesuffix(suffix))
12421250
def encode(self, encoding='utf-8', errors='strict'):
12431251
encoding = 'utf-8' if encoding is None else encoding
12441252
errors = 'strict' if errors is None else errors

Lib/test/string_tests.py

+36
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,42 @@ def test_replace_overflow(self):
682682
self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)
683683
self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16)
684684

685+
def test_removeprefix(self):
686+
self.checkequal('am', 'spam', 'removeprefix', 'sp')
687+
self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam')
688+
self.checkequal('spam', 'spam', 'removeprefix', 'python')
689+
self.checkequal('spam', 'spam', 'removeprefix', 'spider')
690+
self.checkequal('spam', 'spam', 'removeprefix', 'spam and eggs')
691+
692+
self.checkequal('', '', 'removeprefix', '')
693+
self.checkequal('', '', 'removeprefix', 'abcde')
694+
self.checkequal('abcde', 'abcde', 'removeprefix', '')
695+
self.checkequal('', 'abcde', 'removeprefix', 'abcde')
696+
697+
self.checkraises(TypeError, 'hello', 'removeprefix')
698+
self.checkraises(TypeError, 'hello', 'removeprefix', 42)
699+
self.checkraises(TypeError, 'hello', 'removeprefix', 42, 'h')
700+
self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42)
701+
self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l"))
702+
703+
def test_removesuffix(self):
704+
self.checkequal('sp', 'spam', 'removesuffix', 'am')
705+
self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam')
706+
self.checkequal('spam', 'spam', 'removesuffix', 'python')
707+
self.checkequal('spam', 'spam', 'removesuffix', 'blam')
708+
self.checkequal('spam', 'spam', 'removesuffix', 'eggs and spam')
709+
710+
self.checkequal('', '', 'removesuffix', '')
711+
self.checkequal('', '', 'removesuffix', 'abcde')
712+
self.checkequal('abcde', 'abcde', 'removesuffix', '')
713+
self.checkequal('', 'abcde', 'removesuffix', 'abcde')
714+
715+
self.checkraises(TypeError, 'hello', 'removesuffix')
716+
self.checkraises(TypeError, 'hello', 'removesuffix', 42)
717+
self.checkraises(TypeError, 'hello', 'removesuffix', 42, 'h')
718+
self.checkraises(TypeError, 'hello', 'removesuffix', 'h', 42)
719+
self.checkraises(TypeError, 'hello', 'removesuffix', ("lo", "l"))
720+
685721
def test_capitalize(self):
686722
self.checkequal(' hello ', ' hello ', 'capitalize')
687723
self.checkequal('Hello ', 'Hello ','capitalize')

Lib/test/test_doctest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ def non_Python_modules(): r"""
665665
666666
>>> import builtins
667667
>>> tests = doctest.DocTestFinder().find(builtins)
668-
>>> 810 < len(tests) < 830 # approximate number of objects with docstrings
668+
>>> 816 < len(tests) < 836 # approximate number of objects with docstrings
669669
True
670670
>>> real_tests = [t for t in tests if len(t.examples) > 0]
671671
>>> len(real_tests) # objects that actually have doctests

Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -1660,6 +1660,7 @@ Hisao Suzuki
16601660
Kalle Svensson
16611661
Andrew Svetlov
16621662
Paul Swartz
1663+
Dennis Sweeney
16631664
Al Sweigart
16641665
Sviatoslav Sydorenko
16651666
Thenault Sylvain
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Added str.removeprefix and str.removesuffix methods and corresponding
2+
bytes, bytearray, and collections.UserString methods to remove affixes
3+
from a string if present.
4+
See :pep:`616` for a full description.
5+
Patch by Dennis Sweeney.

Objects/bytearrayobject.c

+67
Original file line numberDiff line numberDiff line change
@@ -1181,6 +1181,71 @@ bytearray_endswith(PyByteArrayObject *self, PyObject *args)
11811181
return _Py_bytes_endswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
11821182
}
11831183

1184+
/*[clinic input]
1185+
bytearray.removeprefix as bytearray_removeprefix
1186+
1187+
prefix: Py_buffer
1188+
/
1189+
1190+
Return a bytearray with the given prefix string removed if present.
1191+
1192+
If the bytearray starts with the prefix string, return
1193+
bytearray[len(prefix):]. Otherwise, return a copy of the original
1194+
bytearray.
1195+
[clinic start generated code]*/
1196+
1197+
static PyObject *
1198+
bytearray_removeprefix_impl(PyByteArrayObject *self, Py_buffer *prefix)
1199+
/*[clinic end generated code: output=6cabc585e7f502e0 input=968aada38aedd262]*/
1200+
{
1201+
const char *self_start = PyByteArray_AS_STRING(self);
1202+
Py_ssize_t self_len = PyByteArray_GET_SIZE(self);
1203+
const char *prefix_start = prefix->buf;
1204+
Py_ssize_t prefix_len = prefix->len;
1205+
1206+
if (self_len >= prefix_len
1207+
&& memcmp(self_start, prefix_start, prefix_len) == 0)
1208+
{
1209+
return PyByteArray_FromStringAndSize(self_start + prefix_len,
1210+
self_len - prefix_len);
1211+
}
1212+
1213+
return PyByteArray_FromStringAndSize(self_start, self_len);
1214+
}
1215+
1216+
/*[clinic input]
1217+
bytearray.removesuffix as bytearray_removesuffix
1218+
1219+
suffix: Py_buffer
1220+
/
1221+
1222+
Return a bytearray with the given suffix string removed if present.
1223+
1224+
If the bytearray ends with the suffix string and that suffix is not
1225+
empty, return bytearray[:-len(suffix)]. Otherwise, return a copy of
1226+
the original bytearray.
1227+
[clinic start generated code]*/
1228+
1229+
static PyObject *
1230+
bytearray_removesuffix_impl(PyByteArrayObject *self, Py_buffer *suffix)
1231+
/*[clinic end generated code: output=2bc8cfb79de793d3 input=c1827e810b2f6b99]*/
1232+
{
1233+
const char *self_start = PyByteArray_AS_STRING(self);
1234+
Py_ssize_t self_len = PyByteArray_GET_SIZE(self);
1235+
const char *suffix_start = suffix->buf;
1236+
Py_ssize_t suffix_len = suffix->len;
1237+
1238+
if (self_len >= suffix_len
1239+
&& memcmp(self_start + self_len - suffix_len,
1240+
suffix_start, suffix_len) == 0)
1241+
{
1242+
return PyByteArray_FromStringAndSize(self_start,
1243+
self_len - suffix_len);
1244+
}
1245+
1246+
return PyByteArray_FromStringAndSize(self_start, self_len);
1247+
}
1248+
11841249

11851250
/*[clinic input]
11861251
bytearray.translate
@@ -2203,6 +2268,8 @@ bytearray_methods[] = {
22032268
BYTEARRAY_POP_METHODDEF
22042269
BYTEARRAY_REMOVE_METHODDEF
22052270
BYTEARRAY_REPLACE_METHODDEF
2271+
BYTEARRAY_REMOVEPREFIX_METHODDEF
2272+
BYTEARRAY_REMOVESUFFIX_METHODDEF
22062273
BYTEARRAY_REVERSE_METHODDEF
22072274
{"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, _Py_rfind__doc__},
22082275
{"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, _Py_rindex__doc__},

0 commit comments

Comments
 (0)