Skip to content

bpo-40521: Make bytes singletons per interpreter #21074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ struct _Py_unicode_fs_codec {
_Py_error_handler error_handler;
};

struct _Py_bytes_state {
PyBytesObject *characters[256];
PyBytesObject *empty_string;
};

struct _Py_unicode_state {
struct _Py_unicode_fs_codec fs_codec;
};
Expand Down Expand Up @@ -233,6 +238,7 @@ struct _is {
*/
PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
#endif
struct _Py_bytes_state bytes;
struct _Py_unicode_state unicode;
struct _Py_float_state float_state;
/* Using a cache is very effective since typically only a single slice is
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ extern void _PyDict_Fini(PyThreadState *tstate);
extern void _PyTuple_Fini(PyThreadState *tstate);
extern void _PyList_Fini(PyThreadState *tstate);
extern void _PySet_Fini(PyThreadState *tstate);
extern void _PyBytes_Fini(void);
extern void _PyBytes_Fini(PyThreadState *tstate);
extern void _PyFloat_Fini(PyThreadState *tstate);
extern void _PySlice_Fini(PyThreadState *tstate);
extern void _PyAsyncGen_Fini(PyThreadState *tstate);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
The tuple free lists, the empty tuple singleton, the list free list, the empty
frozenset singleton, the float free list, the slice cache, the dict free lists,
the frame free list, the asynchronous generator free lists, and the context
free list are no longer shared by all interpreters: each interpreter now its
has own free lists and caches.
Each interpreter now its has own free lists, singletons and caches:

* Free lists: float, tuple, list, dict, frame, context,
asynchronous generator.
* Singletons: empty tuple, empty frozenset, empty bytes string,
single byte character.
* Slice cache.

They are no longer shared by all interpreters.
82 changes: 55 additions & 27 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ class bytes "PyBytesObject *" "&PyBytes_Type"

#include "clinic/bytesobject.c.h"

static PyBytesObject *characters[UCHAR_MAX + 1];
static PyBytesObject *nullstring;

_Py_IDENTIFIER(__bytes__);

/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
Expand All @@ -35,6 +32,15 @@ _Py_IDENTIFIER(__bytes__);
Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
char *str);


static struct _Py_bytes_state*
get_bytes_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
return &interp->bytes;
}


/*
For PyBytes_FromString(), the parameter `str' points to a null-terminated
string containing exactly `size' bytes.
Expand Down Expand Up @@ -63,9 +69,13 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
PyBytesObject *op;
assert(size >= 0);

if (size == 0 && (op = nullstring) != NULL) {
Py_INCREF(op);
return (PyObject *)op;
if (size == 0) {
struct _Py_bytes_state *state = get_bytes_state();
op = state->empty_string;
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}

if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Expand All @@ -88,8 +98,9 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
op->ob_sval[size] = '\0';
/* empty byte string singleton */
if (size == 0) {
nullstring = op;
struct _Py_bytes_state *state = get_bytes_state();
Py_INCREF(op);
state->empty_string = op;
}
return (PyObject *) op;
}
Expand All @@ -103,11 +114,13 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
"Negative size passed to PyBytes_FromStringAndSize");
return NULL;
}
if (size == 1 && str != NULL &&
(op = characters[*str & UCHAR_MAX]) != NULL)
{
Py_INCREF(op);
return (PyObject *)op;
if (size == 1 && str != NULL) {
struct _Py_bytes_state *state = get_bytes_state();
op = state->characters[*str & UCHAR_MAX];
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}

op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
Expand All @@ -119,8 +132,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
memcpy(op->ob_sval, str, size);
/* share short strings */
if (size == 1) {
characters[*str & UCHAR_MAX] = op;
struct _Py_bytes_state *state = get_bytes_state();
Py_INCREF(op);
state->characters[*str & UCHAR_MAX] = op;
}
return (PyObject *) op;
}
Expand All @@ -138,13 +152,21 @@ PyBytes_FromString(const char *str)
"byte string is too long");
return NULL;
}
if (size == 0 && (op = nullstring) != NULL) {
Py_INCREF(op);
return (PyObject *)op;

struct _Py_bytes_state *state = get_bytes_state();
if (size == 0) {
op = state->empty_string;
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}
if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Py_INCREF(op);
return (PyObject *)op;
else if (size == 1) {
op = state->characters[*str & UCHAR_MAX];
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}

/* Inline PyObject_NewVar */
Expand All @@ -157,11 +179,12 @@ PyBytes_FromString(const char *str)
memcpy(op->ob_sval, str, size+1);
/* share short strings */
if (size == 0) {
nullstring = op;
Py_INCREF(op);
} else if (size == 1) {
characters[*str & UCHAR_MAX] = op;
state->empty_string = op;
}
else if (size == 1) {
Py_INCREF(op);
state->characters[*str & UCHAR_MAX] = op;
}
return (PyObject *) op;
}
Expand Down Expand Up @@ -1249,6 +1272,8 @@ PyBytes_AsStringAndSize(PyObject *obj,
/* -------------------------------------------------------------------- */
/* Methods */

#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string

#include "stringlib/stringdefs.h"

#include "stringlib/fastsearch.h"
Expand All @@ -1261,6 +1286,8 @@ PyBytes_AsStringAndSize(PyObject *obj,

#include "stringlib/transmogrify.h"

#undef STRINGLIB_GET_EMPTY

PyObject *
PyBytes_Repr(PyObject *obj, int smartquotes)
{
Expand Down Expand Up @@ -3058,12 +3085,13 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
}

void
_PyBytes_Fini(void)
_PyBytes_Fini(PyThreadState *tstate)
{
int i;
for (i = 0; i < UCHAR_MAX + 1; i++)
Py_CLEAR(characters[i]);
Py_CLEAR(nullstring);
struct _Py_bytes_state* state = &tstate->interp->bytes;
for (int i = 0; i < UCHAR_MAX + 1; i++) {
Py_CLEAR(state->characters[i]);
}
Py_CLEAR(state->empty_string);
}

/*********************** Bytes Iterator ****************************/
Expand Down
6 changes: 3 additions & 3 deletions Objects/stringlib/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ STRINGLIB_CHAR

the type used to hold a character (char or Py_UNICODE)

STRINGLIB_EMPTY
STRINGLIB_GET_EMPTY()

a PyObject representing the empty string, only to be used if
STRINGLIB_MUTABLE is 0
returns a PyObject representing the empty string, only to be used if
STRINGLIB_MUTABLE is 0. It must not be NULL.

Py_ssize_t STRINGLIB_LEN(PyObject*)

Expand Down
2 changes: 1 addition & 1 deletion Objects/stringlib/asciilib.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS1
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
Expand Down
20 changes: 12 additions & 8 deletions Objects/stringlib/partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ STRINGLIB(partition)(PyObject* str_obj,
#else
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
assert(empty != NULL);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 1, empty);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 2, empty);
#endif
return out;
}
Expand Down Expand Up @@ -90,10 +92,12 @@ STRINGLIB(rpartition)(PyObject* str_obj,
return NULL;
}
#else
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
assert(empty != NULL);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 0, empty);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 1, empty);
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
#endif
Expand Down
5 changes: 4 additions & 1 deletion Objects/stringlib/stringdefs.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#ifndef STRINGLIB_STRINGDEFS_H
#define STRINGLIB_STRINGDEFS_H

#ifndef STRINGLIB_GET_EMPTY
# error "STRINGLIB_GET_EMPTY macro must be defined"
#endif

/* this is sort of a hack. there's at least one place (formatting
floats) where some stringlib code takes a different path if it's
compiled as unicode. */
Expand All @@ -13,7 +17,6 @@
#define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S"
#define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
Expand Down
2 changes: 1 addition & 1 deletion Objects/stringlib/ucs1lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS1
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
Expand Down
2 changes: 1 addition & 1 deletion Objects/stringlib/ucs2lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS2
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
Expand Down
2 changes: 1 addition & 1 deletion Objects/stringlib/ucs4lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS4
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
Expand Down
2 changes: 1 addition & 1 deletion Objects/stringlib/unicodedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#define STRINGLIB_CHAR Py_UNICODE
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
Expand Down
4 changes: 1 addition & 3 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1262,9 +1262,7 @@ finalize_interp_types(PyThreadState *tstate, int is_main_interp)

_PySlice_Fini(tstate);

if (is_main_interp) {
_PyBytes_Fini();
}
_PyBytes_Fini(tstate);
_PyUnicode_Fini(tstate);
_PyFloat_Fini(tstate);
_PyLong_Fini(tstate);
Expand Down