Skip to content

Commit

Permalink
[3.13] gh-113993: Allow interned strings to be mortal, and fix relate…
Browse files Browse the repository at this point in the history
…d issues (GH-120520) (GH-120945)

* Add an InternalDocs file describing how interning should work and how to use it.

* Add internal functions to *explicitly* request what kind of interning is done:
  - `_PyUnicode_InternMortal`
  - `_PyUnicode_InternImmortal`
  - `_PyUnicode_InternStatic`

* Switch uses of `PyUnicode_InternInPlace` to those.

* Disallow using `_Py_SetImmortal` on strings directly.
  You should use `_PyUnicode_InternImmortal` instead:
  - Strings should be interned before immortalization, otherwise you're possibly
    interning a immortalizing copy.
  - `_Py_SetImmortal` doesn't handle the `SSTATE_INTERNED_MORTAL` to
    `SSTATE_INTERNED_IMMORTAL` update, and those flags can't be changed in
    backports, as they are now part of public API and version-specific ABI.

* Add private `_only_immortal` argument for `sys.getunicodeinternedsize`, used in refleak test machinery.

* Make sure the statically allocated string singletons are unique. This means these sets are now disjoint:
  - `_Py_ID`
  - `_Py_STR` (including the empty string)
  - one-character latin-1 singletons

  Now, when you intern a singleton, that exact singleton will be interned.

* Add a `_Py_LATIN1_CHR` macro, use it instead of `_Py_ID`/`_Py_STR` for one-character latin-1 singletons everywhere (including Clinic).

* Intern `_Py_STR` singletons at startup.

* For free-threaded builds, intern `_Py_LATIN1_CHR` singletons at startup.

* Beef up the tests. Cover internal details (marked with `@cpython_only`).

* Add lots of assertions

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
  • Loading branch information
encukou and ericsnowcurrently authored Jun 24, 2024
1 parent 447e07a commit 9769b7a
Show file tree
Hide file tree
Showing 42 changed files with 2,456 additions and 1,136 deletions.
20 changes: 1 addition & 19 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 5 additions & 19 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,16 @@ struct _Py_global_strings {
STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
STRUCT_FOR_STR(anon_string, "<string>")
STRUCT_FOR_STR(anon_unknown, "<unknown>")
STRUCT_FOR_STR(close_br, "}")
STRUCT_FOR_STR(dbl_close_br, "}}")
STRUCT_FOR_STR(dbl_open_br, "{{")
STRUCT_FOR_STR(dbl_percent, "%%")
STRUCT_FOR_STR(defaults, ".defaults")
STRUCT_FOR_STR(dot, ".")
STRUCT_FOR_STR(dot_locals, ".<locals>")
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(generic_base, ".generic_base")
STRUCT_FOR_STR(json_decoder, "json.decoder")
STRUCT_FOR_STR(kwdefaults, ".kwdefaults")
STRUCT_FOR_STR(list_err, "list index out of range")
STRUCT_FOR_STR(newline, "\n")
STRUCT_FOR_STR(open_br, "{")
STRUCT_FOR_STR(percent, "%")
STRUCT_FOR_STR(type_params, ".type_params")
STRUCT_FOR_STR(utf_8, "utf-8")
} literals;
Expand All @@ -66,7 +61,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(TextIOWrapper)
STRUCT_FOR_ID(True)
STRUCT_FOR_ID(WarningMessage)
STRUCT_FOR_ID(_)
STRUCT_FOR_ID(_WindowsConsoleIO)
STRUCT_FOR_ID(__IOBase_closed)
STRUCT_FOR_ID(__abc_tpflags__)
Expand Down Expand Up @@ -260,6 +254,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(_lock_unlock_module)
STRUCT_FOR_ID(_loop)
STRUCT_FOR_ID(_needs_com_addref_)
STRUCT_FOR_ID(_only_immortal)
STRUCT_FOR_ID(_pack_)
STRUCT_FOR_ID(_restype_)
STRUCT_FOR_ID(_showwarnmsg)
Expand All @@ -272,7 +267,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(_uninitialized_submodules)
STRUCT_FOR_ID(_warn_unawaited_coroutine)
STRUCT_FOR_ID(_xoptions)
STRUCT_FOR_ID(a)
STRUCT_FOR_ID(abs_tol)
STRUCT_FOR_ID(access)
STRUCT_FOR_ID(aclose)
Expand All @@ -296,7 +290,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(attribute)
STRUCT_FOR_ID(authorizer_callback)
STRUCT_FOR_ID(autocommit)
STRUCT_FOR_ID(b)
STRUCT_FOR_ID(backtick)
STRUCT_FOR_ID(base)
STRUCT_FOR_ID(before)
Expand All @@ -314,7 +307,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(byteorder)
STRUCT_FOR_ID(bytes)
STRUCT_FOR_ID(bytes_per_sep)
STRUCT_FOR_ID(c)
STRUCT_FOR_ID(c_call)
STRUCT_FOR_ID(c_exception)
STRUCT_FOR_ID(c_return)
Expand Down Expand Up @@ -370,7 +362,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(count)
STRUCT_FOR_ID(covariant)
STRUCT_FOR_ID(cwd)
STRUCT_FOR_ID(d)
STRUCT_FOR_ID(data)
STRUCT_FOR_ID(database)
STRUCT_FOR_ID(day)
Expand Down Expand Up @@ -399,7 +390,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(dont_inherit)
STRUCT_FOR_ID(dst)
STRUCT_FOR_ID(dst_dir_fd)
STRUCT_FOR_ID(e)
STRUCT_FOR_ID(eager_start)
STRUCT_FOR_ID(effective_ids)
STRUCT_FOR_ID(element_factory)
Expand All @@ -423,7 +413,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(exp)
STRUCT_FOR_ID(extend)
STRUCT_FOR_ID(extra_tokens)
STRUCT_FOR_ID(f)
STRUCT_FOR_ID(facility)
STRUCT_FOR_ID(factory)
STRUCT_FOR_ID(false)
Expand Down Expand Up @@ -456,7 +445,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(fset)
STRUCT_FOR_ID(func)
STRUCT_FOR_ID(future)
STRUCT_FOR_ID(g)
STRUCT_FOR_ID(generation)
STRUCT_FOR_ID(genexpr)
STRUCT_FOR_ID(get)
Expand All @@ -470,7 +458,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(globals)
STRUCT_FOR_ID(groupindex)
STRUCT_FOR_ID(groups)
STRUCT_FOR_ID(h)
STRUCT_FOR_ID(handle)
STRUCT_FOR_ID(handle_seq)
STRUCT_FOR_ID(has_location)
Expand Down Expand Up @@ -581,7 +568,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(msg)
STRUCT_FOR_ID(mutex)
STRUCT_FOR_ID(mycmp)
STRUCT_FOR_ID(n)
STRUCT_FOR_ID(n_arg)
STRUCT_FOR_ID(n_fields)
STRUCT_FOR_ID(n_sequence_fields)
Expand Down Expand Up @@ -627,7 +613,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(outgoing)
STRUCT_FOR_ID(overlapped)
STRUCT_FOR_ID(owner)
STRUCT_FOR_ID(p)
STRUCT_FOR_ID(pages)
STRUCT_FOR_ID(parent)
STRUCT_FOR_ID(password)
Expand Down Expand Up @@ -655,7 +640,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(ps2)
STRUCT_FOR_ID(query)
STRUCT_FOR_ID(quotetabs)
STRUCT_FOR_ID(r)
STRUCT_FOR_ID(raw)
STRUCT_FOR_ID(read)
STRUCT_FOR_ID(read1)
Expand All @@ -679,7 +663,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(return)
STRUCT_FOR_ID(reverse)
STRUCT_FOR_ID(reversed)
STRUCT_FOR_ID(s)
STRUCT_FOR_ID(salt)
STRUCT_FOR_ID(sched_priority)
STRUCT_FOR_ID(scheduler)
Expand Down Expand Up @@ -786,7 +769,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(writable)
STRUCT_FOR_ID(write)
STRUCT_FOR_ID(write_through)
STRUCT_FOR_ID(x)
STRUCT_FOR_ID(year)
STRUCT_FOR_ID(zdict)
} identifiers;
Expand All @@ -809,6 +791,10 @@ struct _Py_global_strings {
(_Py_SINGLETON(strings.identifiers._py_ ## NAME._ascii.ob_base))
#define _Py_STR(NAME) \
(_Py_SINGLETON(strings.literals._py_ ## NAME._ascii.ob_base))
#define _Py_LATIN1_CHR(CH) \
((CH) < 128 \
? (PyObject*)&_Py_SINGLETON(strings).ascii[(CH)] \
: (PyObject*)&_Py_SINGLETON(strings).latin1[(CH) - 128])

/* _Py_DECLARE_STR() should precede all uses of _Py_STR() in a function.
Expand Down
Loading

0 comments on commit 9769b7a

Please sign in to comment.