Skip to content

Commit 45ec02a

Browse files
committed
SF patch 576101, by Oren Tirosh: alternative implementation of
interning. I modified Oren's patch significantly, but the basic idea and most of the implementation is unchanged. Interned strings created with PyString_InternInPlace() are now mortal, and you must keep a reference to the resulting string around; use the new function PyString_InternImmortal() to create immortal interned strings.
1 parent d8dbf84 commit 45ec02a

File tree

7 files changed

+171
-106
lines changed

7 files changed

+171
-106
lines changed

Doc/lib/libfuncs.tex

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,10 @@ \section{Built-in Functions \label{built-in-funcs}}
518518
be done by a pointer compare instead of a string compare. Normally,
519519
the names used in Python programs are automatically interned, and
520520
the dictionaries used to hold module, class or instance attributes
521-
have interned keys. Interned strings are immortal (never get
522-
garbage collected).
521+
have interned keys. \versionchanged[Interned strings are not
522+
immortal (like they used to be in Python 2.2 and before);
523+
you must keep a reference to the return value of \function{intern()}
524+
around to benefit from it]{2.3}
523525
\end{funcdesc}
524526

525527
\begin{funcdesc}{isinstance}{object, classinfo}

Include/modsupport.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ PyAPI_FUNC(int) PyModule_AddObject(PyObject *, char *, PyObject *);
2323
PyAPI_FUNC(int) PyModule_AddIntConstant(PyObject *, char *, long);
2424
PyAPI_FUNC(int) PyModule_AddStringConstant(PyObject *, char *, char *);
2525

26-
#define PYTHON_API_VERSION 1011
27-
#define PYTHON_API_STRING "1011"
26+
#define PYTHON_API_VERSION 1012
27+
#define PYTHON_API_STRING "1012"
2828
/* The API version is maintained (independently from the Python version)
2929
so we can detect mismatches between the interpreter and dynamically
3030
loaded modules. These are diagnosed by an error message but
@@ -38,6 +38,9 @@ PyAPI_FUNC(int) PyModule_AddStringConstant(PyObject *, char *, char *);
3838
Please add a line or two to the top of this log for each API
3939
version change:
4040
41+
19-Aug-2002 GvR 1012 Changes to string object struct for
42+
interning changes, saving 3 bytes.
43+
4144
17-Jul-2001 GvR 1011 Descr-branch, just to be on the safe side
4245
4346
25-Jan-2001 FLD 1010 Parameters added to PyCode_New() and

Include/stringobject.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ functions should be applied to nil objects.
2525
*/
2626

2727
/* Caching the hash (ob_shash) saves recalculation of a string's hash value.
28-
Interning strings (ob_sinterned) tries to ensure that only one string
28+
Interning strings (ob_sstate) tries to ensure that only one string
2929
object with a given value exists, so equality tests can be one pointer
3030
comparison. This is generally restricted to strings that "look like"
3131
Python identifiers, although the intern() builtin can be used to force
@@ -35,10 +35,14 @@ functions should be applied to nil objects.
3535
typedef struct {
3636
PyObject_VAR_HEAD
3737
long ob_shash;
38-
PyObject *ob_sinterned;
38+
int ob_sstate;
3939
char ob_sval[1];
4040
} PyStringObject;
4141

42+
#define SSTATE_NOT_INTERNED 0
43+
#define SSTATE_INTERNED_MORTAL 1
44+
#define SSTATE_INTERNED_IMMORTAL 2
45+
4246
PyAPI_DATA(PyTypeObject) PyBaseString_Type;
4347
PyAPI_DATA(PyTypeObject) PyString_Type;
4448

@@ -66,9 +70,13 @@ extern DL_IMPORT(PyObject *) PyString_DecodeEscape(const char *, int,
6670
const char *);
6771

6872
PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
73+
PyAPI_FUNC(void) PyString_InternImmortal(PyObject **);
6974
PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
7075
PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
7176

77+
/* Use only if you know it's a string */
78+
#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
79+
7280
/* Macro, trading safety for speed */
7381
#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval)
7482
#define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size)

Misc/NEWS

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ Type/class unification and new-style classes
5757

5858
Core and builtins
5959

60+
- A subtle change to the semantics of the built-in function intern():
61+
interned strings are no longer immortal. You must keep a reference
62+
to the return value intern() around to get the benefit.
63+
6064
- Use of 'None' as a variable, argument or attribute name now
6165
issues a SyntaxWarning. In the future, None may become a keyword.
6266

@@ -514,6 +518,19 @@ Build
514518

515519
C API
516520

521+
- The string object's layout has changed: the pointer member
522+
ob_sinterned has been replaced by an int member ob_sstate. On some
523+
platforms (e.g. most 64-bit systems) this may change the offset of
524+
the ob_sval member, so as a precaution the API_VERSION has been
525+
incremented. The apparently unused feature of "indirect interned
526+
strings", supported by the ob_sinterned member, is gone. Interned
527+
strings are now usually mortal; theres a new API,
528+
PyString_InternImmortal() that creates immortal interned strings.
529+
(The ob_sstate member can only take three values; however, while
530+
making it a char saves a few bytes per string object on average, in
531+
it also slowed things down a bit because ob_sval was no longer
532+
aligned.)
533+
517534
- The Py_InitModule*() functions now accept NULL for the 'methods'
518535
argument. Modules without global functions are becoming more common
519536
now that factories can be types rather than functions.

Objects/classobject.c

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,37 +2300,38 @@ instancemethod_traverse(PyMethodObject *im, visitproc visit, void *arg)
23002300
return 0;
23012301
}
23022302

2303-
static char *
2304-
getclassname(PyObject *class)
2303+
static void
2304+
getclassname(PyObject *class, char *buf, int bufsize)
23052305
{
23062306
PyObject *name;
23072307

2308+
assert(bufsize > 1);
2309+
strcpy(buf, "?"); /* Default outcome */
23082310
if (class == NULL)
2309-
name = NULL;
2310-
else
2311-
name = PyObject_GetAttrString(class, "__name__");
2311+
return;
2312+
name = PyObject_GetAttrString(class, "__name__");
23122313
if (name == NULL) {
23132314
/* This function cannot return an exception */
23142315
PyErr_Clear();
2315-
return "?";
2316+
return;
23162317
}
2317-
if (!PyString_Check(name)) {
2318-
Py_DECREF(name);
2319-
return "?";
2318+
if (PyString_Check(name)) {
2319+
strncpy(buf, PyString_AS_STRING(name), bufsize);
2320+
buf[bufsize-1] = '\0';
23202321
}
2321-
PyString_InternInPlace(&name);
23222322
Py_DECREF(name);
2323-
return PyString_AS_STRING(name);
23242323
}
23252324

2326-
static char *
2327-
getinstclassname(PyObject *inst)
2325+
static void
2326+
getinstclassname(PyObject *inst, char *buf, int bufsize)
23282327
{
23292328
PyObject *class;
2330-
char *name;
23312329

2332-
if (inst == NULL)
2333-
return "nothing";
2330+
if (inst == NULL) {
2331+
assert(bufsize > strlen("nothing"));
2332+
strcpy(buf, "nothing");
2333+
return;
2334+
}
23342335

23352336
class = PyObject_GetAttrString(inst, "__class__");
23362337
if (class == NULL) {
@@ -2339,9 +2340,8 @@ getinstclassname(PyObject *inst)
23392340
class = (PyObject *)(inst->ob_type);
23402341
Py_INCREF(class);
23412342
}
2342-
name = getclassname(class);
2343+
getclassname(class, buf, bufsize);
23432344
Py_XDECREF(class);
2344-
return name;
23452345
}
23462346

23472347
static PyObject *
@@ -2366,14 +2366,18 @@ instancemethod_call(PyObject *func, PyObject *arg, PyObject *kw)
23662366
return NULL;
23672367
}
23682368
if (!ok) {
2369+
char clsbuf[256];
2370+
char instbuf[256];
2371+
getclassname(class, clsbuf, sizeof(clsbuf));
2372+
getinstclassname(self, instbuf, sizeof(instbuf));
23692373
PyErr_Format(PyExc_TypeError,
23702374
"unbound method %s%s must be called with "
23712375
"%s instance as first argument "
23722376
"(got %s%s instead)",
23732377
PyEval_GetFuncName(func),
23742378
PyEval_GetFuncDesc(func),
2375-
getclassname(class),
2376-
getinstclassname(self),
2379+
clsbuf,
2380+
instbuf,
23772381
self == NULL ? "" : " instance");
23782382
return NULL;
23792383
}

Objects/dictobject.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -511,15 +511,9 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
511511
}
512512
mp = (dictobject *)op;
513513
if (PyString_CheckExact(key)) {
514-
if (((PyStringObject *)key)->ob_sinterned != NULL) {
515-
key = ((PyStringObject *)key)->ob_sinterned;
516-
hash = ((PyStringObject *)key)->ob_shash;
517-
}
518-
else {
519-
hash = ((PyStringObject *)key)->ob_shash;
520-
if (hash == -1)
521-
hash = PyObject_Hash(key);
522-
}
514+
hash = ((PyStringObject *)key)->ob_shash;
515+
if (hash == -1)
516+
hash = PyObject_Hash(key);
523517
}
524518
else {
525519
hash = PyObject_Hash(key);

0 commit comments

Comments
 (0)