Skip to content

bpo-40521: Optimize PyUnicode_New(0, maxchar) #21099

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 23, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 25 additions & 55 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,28 +231,19 @@ get_unicode_state(void)


// Return a borrowed reference to the empty string singleton.
// Return NULL if the singleton was not created yet.
static inline PyObject* unicode_get_empty(void)
{
struct _Py_unicode_state *state = get_unicode_state();
// unicode_get_empty() must not be called before _PyUnicode_Init()
// or after _PyUnicode_Fini()
assert(state->empty != NULL);
return state->empty;
}

static inline PyObject* unicode_new_empty(void)
{
struct _Py_unicode_state *state = get_unicode_state();
PyObject *empty = state->empty;
if (empty != NULL) {
Py_INCREF(empty);
}
else {
empty = PyUnicode_New(0, 0);
if (empty != NULL) {
Py_INCREF(empty);
assert(_PyUnicode_CheckConsistency(empty, 1));
state->empty = empty;
}
}
PyObject *empty = unicode_get_empty();
Py_INCREF(empty);
return empty;
}

Expand Down Expand Up @@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode)
PyObject *empty = unicode_get_empty();
if (unicode != empty) {
Py_DECREF(unicode);

Py_INCREF(empty);
return empty;
}
// unicode is the empty string singleton
return unicode;
return empty;
}

#ifdef LATIN1_SINGLETONS
Expand Down Expand Up @@ -959,7 +947,7 @@ ensure_unicode(PyObject *obj)

/* Compilation of templated routines */

#define STRINGLIB_GET_EMPTY() unicode_get_empty()
#define STRINGLIB_GET_EMPTY() unicode_get_empty()

#include "stringlib/asciilib.h"
#include "stringlib/fastsearch.h"
Expand Down Expand Up @@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length)

/* Optimization for empty strings */
if (length == 0) {
PyObject *empty = unicode_get_empty();
if (empty != NULL) {
Py_INCREF(empty);
return (PyUnicodeObject *)empty;
}
return (PyUnicodeObject *)unicode_new_empty();
}

/* Ensure we won't overflow the size. */
Expand Down Expand Up @@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
{
/* Optimization for empty strings */
if (size == 0) {
PyObject *empty = unicode_get_empty();
if (empty != NULL) {
Py_INCREF(empty);
return empty;
}
return unicode_new_empty();
}

PyObject *obj;
Expand Down Expand Up @@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode)
static int
unicode_is_singleton(PyObject *unicode)
{
struct _Py_unicode_state *state = get_unicode_state();
if (unicode == state->empty) {
if (unicode == unicode_get_empty()) {
return 1;
}
#ifdef LATIN1_SINGLETONS
Expand Down Expand Up @@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)

if (length == 0) {
PyObject *empty = unicode_new_empty();
if (!empty)
return -1;
Py_SETREF(*p_unicode, empty);
return 0;
}
Expand Down Expand Up @@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1,
}
new_size = slen + n * (len2 - len1);
if (new_size == 0) {
PyObject *empty = unicode_new_empty();
if (!empty)
goto error;
u = empty;
u = unicode_new_empty();
goto done;
}
if (new_size > (PY_SSIZE_T_MAX / rkind)) {
Expand Down Expand Up @@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference
if (!empty) {
out = NULL;
}
else {
out = PyTuple_Pack(3, str_obj, empty, empty);
}
return out;
return PyTuple_Pack(3, str_obj, empty, empty);
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
Expand Down Expand Up @@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference
if (!empty) {
out = NULL;
}
else {
out = PyTuple_Pack(3, empty, empty, str_obj);
}
return out;
return PyTuple_Pack(3, empty, empty, str_obj);
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
Expand Down Expand Up @@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate)
0x2029, /* PARAGRAPH SEPARATOR */
};

/* Init the implementation */
PyObject *empty = unicode_new_empty();
if (!empty) {
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
// optimized to always use state->empty without having to check if it is
// NULL or not.
PyObject *empty = PyUnicode_New(1, 0);
if (empty == NULL) {
return _PyStatus_NO_MEMORY();
}
Py_DECREF(empty);
PyUnicode_1BYTE_DATA(empty)[0] = 0;
_PyUnicode_LENGTH(empty) = 0;
assert(_PyUnicode_CheckConsistency(empty, 1));

struct _Py_unicode_state *state = &tstate->interp->unicode;
assert(state->empty == NULL);
state->empty = empty;

if (_Py_IsMainInterpreter(tstate)) {
/* initialize the linebreak bloom filter */
Expand Down