From f8196614db84a4ae577161f8aff8157c84fef777 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 27 May 2024 16:21:18 +0200
Subject: [PATCH] gh-119609: Add PyUnicode_Export() function

Add PyUnicode_Export(), PyUnicode_GetBufferFormat() and
PyUnicode_Import() functions to the limited C API.
---
 Doc/c-api/unicode.rst                         |  65 +++++
 Doc/data/stable_abi.dat                       |   3 +
 Doc/whatsnew/3.14.rst                         |   4 +
 Include/unicodeobject.h                       |  18 ++
 Lib/test/test_capi/test_unicode.py            | 183 ++++++++++++++-
 Lib/test/test_stable_abi_ctypes.py            |   3 +
 ...-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst |   3 +
 Misc/stable_abi.toml                          |  16 ++
 Modules/_testlimitedcapi/unicode.c            |  70 ++++++
 Objects/unicodeobject.c                       | 222 +++++++++++++++++-
 PC/python3dll.c                               |   3 +
 11 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 958fafd47ac81b5..603905d21555e5b 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,6 +341,71 @@ APIs:
    .. versionadded:: 3.3
 
 
+.. c:function:: int PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
+
+   Export the contents of the *unicode* string in one of the requested format
+   *requested_formats*.
+
+   * On success, fill *view*, and return ``0``.
+   * On error, set an exception and return ``-1``.
+
+   The export must be released by :c:func:`PyBuffer_Release`.
+   The contents of the buffer are valid until they are released.
+
+   The buffer is read-only and must not be modified.
+
+   *unicode* and *view* must not be NULL.
+
+   Available formats:
+
+   .. c:namespace:: NULL
+
+   ===================================  ========  ===========================
+   Constant Identifier                  Value     Description
+   ===================================  ========  ===========================
+   .. c:macro:: PyUnicode_FORMAT_ASCII  ``0x01``  ASCII string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS1   ``0x02``  UCS-1 string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS2   ``0x04``  UCS-2 string (``Py_UCS2*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS4   ``0x08``  UCS-4 string (``Py_UCS4*``)
+   .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x10``  UTF-8 string (``char*``)
+   ===================================  ========  ===========================
+
+   *requested_formats* can be a single format or a bitwise combination of the
+   formats in the table above.
+   On success, *\*format* will be set to a single one of the requested flags.
+
+   Note that future versions of Python may introduce additional formats.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: int PyUnicode_GetBufferFormat(const Py_buffer *view, uint32_t *format)
+
+   Get the format of the buffer *view*.
+
+   * On success, set *\*result* to the corresponding `PyUnicode_FORMAT_*` value
+     and return ``0``.
+   * On error, set an exception and return ``-1``.
+
+   *view* must be a buffer filled by :c:func:`PyUnicode_Export`.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
+
+   Create a string object from a buffer in an “export format”.
+
+   * Return a reference to a new string object on success.
+   * Set an exception and return ``NULL`` on error.
+
+   *data* must not be NULL. *nbytes* must be positive or zero.
+
+   See :c:func:`PyUnicode_Export` for the available formats.
+
+   .. versionadded:: 3.14
+
+
 .. c:function:: PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, \
                                                     Py_ssize_t size)
 
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index 7eeee270bb7f322..a6745986c2025e6 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -784,6 +784,7 @@ func,PyUnicode_EncodeFSDefault,3.2,,
 func,PyUnicode_EncodeLocale,3.7,,
 func,PyUnicode_EqualToUTF8,3.13,,
 func,PyUnicode_EqualToUTF8AndSize,3.13,,
+func,PyUnicode_Export,3.14,,
 func,PyUnicode_FSConverter,3.2,,
 func,PyUnicode_FSDecoder,3.2,,
 func,PyUnicode_Find,3.2,,
@@ -797,8 +798,10 @@ func,PyUnicode_FromOrdinal,3.2,,
 func,PyUnicode_FromString,3.2,,
 func,PyUnicode_FromStringAndSize,3.2,,
 func,PyUnicode_FromWideChar,3.2,,
+func,PyUnicode_GetBufferFormat,3.14,,
 func,PyUnicode_GetDefaultEncoding,3.2,,
 func,PyUnicode_GetLength,3.7,,
+func,PyUnicode_Import,3.14,,
 func,PyUnicode_InternFromString,3.2,,
 func,PyUnicode_InternInPlace,3.2,,
 func,PyUnicode_IsIdentifier,3.2,,
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index e1bd52370d776c4..1d5e2a10b1b6dca 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -529,6 +529,10 @@ New Features
 
   (Contributed by Victor Stinner in :gh:`107954`.)
 
+* Add :c:func:`PyUnicode_Export`, :c:func:`PyUnicode_GetBufferFormat`,
+  and :c:func:`PyUnicode_Import` functions to export and import strings.
+  (Contributed by Victor Stinner in :gh:`119609`.)
+
 
 Porting to Python 3.14
 ----------------------
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index dee00715b3c51d5..75d41a90ae65d7d 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -248,6 +248,24 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     const char *u              /* UTF-8 encoded string */
     );
 
+#define PyUnicode_FORMAT_ASCII 0x01  // Py_UCS1* (ASCII string)
+#define PyUnicode_FORMAT_UCS1 0x02   // Py_UCS1*
+#define PyUnicode_FORMAT_UCS2 0x04   // Py_UCS2*
+#define PyUnicode_FORMAT_UCS4 0x08   // Py_UCS4*
+#define PyUnicode_FORMAT_UTF8 0x10   // char*
+
+PyAPI_FUNC(int) PyUnicode_Export(
+    PyObject *unicode,
+    uint32_t requested_formats,
+    Py_buffer *view);
+PyAPI_FUNC(int) PyUnicode_GetBufferFormat(
+    const Py_buffer *view,
+    uint32_t *format);
+PyAPI_FUNC(PyObject*) PyUnicode_Import(
+    const void *data,
+    Py_ssize_t nbytes,
+    uint32_t format);
+
 /* --- wchar_t support for platforms which support it --------------------- */
 
 #ifdef HAVE_WCHAR_H
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index e6f854272149587..19397cb30715ddb 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1,5 +1,6 @@
-import unittest
+import struct
 import sys
+import unittest
 from test import support
 from test.support import import_helper
 
@@ -28,6 +29,14 @@ class Str(str):
     pass
 
 
+PyUnicode_FORMAT_ASCII = 0x01
+PyUnicode_FORMAT_UCS1 = 0x02
+PyUnicode_FORMAT_UCS2 = 0x04
+PyUnicode_FORMAT_UCS4 = 0x08
+PyUnicode_FORMAT_UTF8 = 0x10
+# Invalid native format
+PyUnicode_FORMAT_INVALID = 0x20
+
 class CAPITest(unittest.TestCase):
 
     @support.cpython_only
@@ -1721,6 +1730,142 @@ def test_pep393_utf8_caching_bug(self):
                 # Check that the second call returns the same result
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 
+    def test_unicode_export(self):
+        # Test PyUnicode_Export() and PyUnicode_FreeExport()
+        unicode_export = _testlimitedcapi.unicode_export
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        # export to the native format
+        formats = (PyUnicode_FORMAT_ASCII
+                   | PyUnicode_FORMAT_UCS1
+                   | PyUnicode_FORMAT_UCS2
+                   | PyUnicode_FORMAT_UCS4)
+        BUFFER_UCS1 = 'B'
+        BUFFER_UCS2 = 'H'
+        if struct.calcsize('I') == 4:
+            BUFFER_UCS4 = 'I'
+        elif struct.calcsize('L') == 4:
+            BUFFER_UCS4 = 'L'
+        else:
+            self.fail("unable to get BUFFER_UCS4 ")
+
+        def check_ucs1(text, formats):
+            if formats == PyUnicode_FORMAT_UCS1:
+                export_format = PyUnicode_FORMAT_UCS1
+            elif text.isascii():
+                export_format = PyUnicode_FORMAT_ASCII
+            else:
+                export_format = PyUnicode_FORMAT_UCS1
+            self.assertEqual(unicode_export(text, formats),
+                             (text.encode('latin1'), export_format, 1, BUFFER_UCS1))
+
+        def check_ucs2(text, formats):
+            self.assertEqual(unicode_export(text, formats),
+                             (text.encode(ucs2_enc),
+                              PyUnicode_FORMAT_UCS2, 2, BUFFER_UCS2))
+
+        def check_ucs4(text, formats):
+            self.assertEqual(unicode_export(text, formats),
+                             (text.encode(ucs4_enc),
+                              PyUnicode_FORMAT_UCS4, 4, BUFFER_UCS4))
+
+        def check_utf8(text):
+            self.assertEqual(unicode_export(text, PyUnicode_FORMAT_UTF8),
+                             (text.encode('utf8'),
+                              PyUnicode_FORMAT_UTF8, 1, 'B'))
+
+        check_ucs1("abc", formats)
+        check_ucs1("latin1:\xe9", formats)
+        check_ucs2('ucs2:\u20ac', formats)
+        check_ucs4('ucs4:\U0010ffff', formats)
+
+        # export ASCII as UCS1
+        check_ucs1("abc", PyUnicode_FORMAT_UCS1)
+
+        # export ASCII and UCS1 to UCS2
+        check_ucs2("abc", PyUnicode_FORMAT_UCS2)
+        check_ucs2("latin1:\xe9", PyUnicode_FORMAT_UCS2)
+
+        # always export to UCS4
+        check_ucs4("abc", PyUnicode_FORMAT_UCS4)
+        check_ucs4("latin1:\xe9", PyUnicode_FORMAT_UCS4)
+        check_ucs4('ucs2:\u20ac', PyUnicode_FORMAT_UCS4)
+        check_ucs4('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4)
+
+        # always export to UTF8
+        check_utf8("abc")
+        check_utf8("latin1:\xe9")
+        check_utf8('ucs2:\u20ac')
+        check_utf8('ucs4:\U0010ffff')
+
+        # No supported format or invalid format
+        for formats in (0, PyUnicode_FORMAT_INVALID):
+            err_msg = "unable to find a matching export format"
+            with self.subTest(formats=formats):
+                with self.assertRaisesRegex(ValueError, err_msg):
+                    unicode_export('abc', formats)
+
+    def test_unicode_import(self):
+        # Test PyUnicode_Import()
+        unicode_import = _testlimitedcapi.unicode_import
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        self.assertEqual(unicode_import(b'abc', PyUnicode_FORMAT_ASCII),
+                         "abc")
+        self.assertEqual(unicode_import(b'latin1:\xe9', PyUnicode_FORMAT_UCS1),
+                         "latin1:\xe9")
+
+        self.assertEqual(unicode_import('ucs2:\u20ac'.encode(ucs2_enc),
+                                          PyUnicode_FORMAT_UCS2),
+                         'ucs2:\u20ac')
+
+        self.assertEqual(unicode_import('ucs4:\U0010ffff'.encode(ucs4_enc),
+                                          PyUnicode_FORMAT_UCS4),
+                         'ucs4:\U0010ffff')
+
+        text = "abc\xe9\U0010ffff"
+        self.assertEqual(unicode_import(text.encode('utf8'),
+                                          PyUnicode_FORMAT_UTF8),
+                         text)
+
+        # Empty string
+        for native_format in (
+            PyUnicode_FORMAT_ASCII,
+            PyUnicode_FORMAT_UCS1,
+            PyUnicode_FORMAT_UCS2,
+            PyUnicode_FORMAT_UCS4,
+            PyUnicode_FORMAT_UTF8,
+        ):
+            with self.subTest(native_format=native_format):
+                self.assertEqual(unicode_import(b'', native_format),
+                                 '')
+
+        # Invalid format
+        with self.assertRaises(ValueError):
+            unicode_import(b'', PyUnicode_FORMAT_INVALID)
+
+        # Invalid size
+        ucs2 = 'ucs2:\u20ac'.encode(ucs2_enc)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs2[:-1], PyUnicode_FORMAT_UCS2)
+        ucs4 = 'ucs4:\U0010ffff'.encode(ucs4_enc)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-1], PyUnicode_FORMAT_UCS4)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-2], PyUnicode_FORMAT_UCS4)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4)
+
 
 class PyUnicodeWriterTest(unittest.TestCase):
     def create_writer(self, size):
@@ -1903,6 +2048,38 @@ def test_recover_error(self):
 
         self.assertEqual(writer.finish(), 'Hello World.')
 
-
-if __name__ == "__main__":
+    def test_unicode_export_import_roundtrip(self):
+        unicode_export = _testlimitedcapi.unicode_export
+        unicode_import = _testlimitedcapi.unicode_import
+
+        ASCII = PyUnicode_FORMAT_ASCII
+        UCS1 = PyUnicode_FORMAT_UCS1
+        UCS2 = PyUnicode_FORMAT_UCS2
+        UCS4 = PyUnicode_FORMAT_UCS4
+        UTF8 = PyUnicode_FORMAT_UTF8
+        ALL = (ASCII | UCS1 | UCS2 | UCS4 | UTF8)
+
+        for string, allowed_formats in (
+            ('', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('ascii', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('latin1:\xe9', {UCS1, UCS2, UCS4, UTF8}),
+            ('ucs2:\u20ac', {UCS2, UCS4, UTF8}),
+            ('ucs4:\U0001f638', {UCS4, UTF8}),
+        ):
+            for format in ASCII, UCS1, UCS2, UCS4, UTF8:
+                with self.subTest(string=string, format=format):
+                    if format not in allowed_formats:
+                        with self.assertRaises(ValueError):
+                            unicode_export(string, format)
+                    else:
+                        buf, buf_fmt, item_size, view_fmt = unicode_export(string, format)
+                        restored = unicode_import(buf, buf_fmt)
+                        self.assertEqual(restored, string)
+
+            buf, buf_fmt, item_size, view_fmt = unicode_export(string, ALL)
+            restored = unicode_import(buf, buf_fmt)
+            self.assertEqual(restored, string)
+
+
+if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index 4bca33b7451f80b..b496b43d4ef6cdb 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -806,6 +806,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_EncodeLocale",
     "PyUnicode_EqualToUTF8",
     "PyUnicode_EqualToUTF8AndSize",
+    "PyUnicode_Export",
     "PyUnicode_FSConverter",
     "PyUnicode_FSDecoder",
     "PyUnicode_Find",
@@ -819,9 +820,11 @@ def test_windows_feature_macros(self):
     "PyUnicode_FromString",
     "PyUnicode_FromStringAndSize",
     "PyUnicode_FromWideChar",
+    "PyUnicode_GetBufferFormat",
     "PyUnicode_GetDefaultEncoding",
     "PyUnicode_GetLength",
     "PyUnicode_GetSize",
+    "PyUnicode_Import",
     "PyUnicode_InternFromString",
     "PyUnicode_InternImmortal",
     "PyUnicode_InternInPlace",
diff --git a/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst b/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst
new file mode 100644
index 000000000000000..6d75f0c192bc858
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2024-09-05-16-57-51.gh-issue-119609.5EZ-kg.rst
@@ -0,0 +1,3 @@
+Add :c:func:`PyUnicode_Export`, :c:func:`PyUnicode_GetBufferFormat`, and
+:c:func:`PyUnicode_Import` functions to export and import strings. Patch by
+Victor Stinner.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 8bf638c473c712f..7fb8971326a0649 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2526,3 +2526,19 @@
     added = '3.14'
 [function.PyLong_AsUInt64]
     added = '3.14'
+[const.PyUnicode_FORMAT_ASCII]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS1]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS2]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS4]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UTF8]
+    added = '3.14'
+[function.PyUnicode_Export]
+    added = '3.14'
+[function.PyUnicode_GetBufferFormat]
+    added = '3.14'
+[function.PyUnicode_Import]
+    added = '3.14'
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 2b70d09108a3335..2f21d0a338fa938 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1837,6 +1837,74 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
 #undef CHECK_FORMAT_0
 }
 
+
+// Test PyUnicode_Export()
+static PyObject*
+unicode_export(PyObject *self, PyObject *args)
+{
+    PyObject *obj;
+    unsigned int requested_formats;
+    if (!PyArg_ParseTuple(args, "OI", &obj, &requested_formats)) {
+        return NULL;
+    }
+
+    Py_buffer view;
+    if (PyUnicode_Export(obj, requested_formats, &view) < 0) {
+        return NULL;
+    }
+    uint32_t format;
+    if (PyUnicode_GetBufferFormat(&view, &format) < 0) {
+        return NULL;
+    }
+
+    // Make sure that the exported string ends with a NUL character
+    char *data = view.buf;
+    Py_ssize_t nbytes = view.len * view.itemsize;
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+    case PyUnicode_FORMAT_UCS1:
+        assert(data[nbytes] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS2:
+        assert(data[nbytes] == 0);
+        assert(data[nbytes+1] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        assert(data[nbytes] == 0);
+        assert(data[nbytes+1] == 0);
+        assert(data[nbytes+2] == 0);
+        assert(data[nbytes+3] == 0);
+        break;
+    case PyUnicode_FORMAT_UTF8:
+        assert(data[nbytes] == 0);
+        break;
+    }
+
+    assert(view.format != NULL);
+    PyObject *res = Py_BuildValue("y#Iis",
+                                  view.buf, view.len * view.itemsize,
+                                  (unsigned int)format,
+                                  (int)view.itemsize, view.format);
+    PyBuffer_Release(&view);
+    return res;
+}
+
+
+// Test PyUnicode_Import()
+static PyObject*
+unicode_import(PyObject *self, PyObject *args)
+{
+    const void *data;
+    Py_ssize_t nbytes;
+    unsigned int format;
+    if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) {
+        return NULL;
+    }
+    return PyUnicode_Import(data, nbytes, format);
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"codec_incrementalencoder", codec_incrementalencoder,       METH_VARARGS},
     {"codec_incrementaldecoder", codec_incrementaldecoder,       METH_VARARGS},
@@ -1924,6 +1992,8 @@ static PyMethodDef TestMethods[] = {
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},
     {"unicode_isidentifier",     unicode_isidentifier,           METH_O},
+    {"unicode_export",           unicode_export,                 METH_VARARGS},
+    {"unicode_import",           unicode_import,                 METH_VARARGS},
     {NULL},
 };
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2494c989544ca01..fe34536ff86c797 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2332,6 +2332,222 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
 }
 
 
+int
+unicode_export(PyObject *unicode, Py_buffer *view,
+               Py_ssize_t len, const void *buf,
+               int itemsize, const char *format, uint32_t internal_format)
+{
+    if (PyBuffer_FillInfo(view, unicode, (void*)buf, len,
+                          1, PyBUF_SIMPLE) < 0) {
+        return -1;
+    }
+    view->itemsize = itemsize;
+    view->format = (char*)format;
+    view->internal = (void*)(uintptr_t)internal_format;
+    return 0;
+}
+
+
+int
+PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view)
+{
+#if SIZEOF_INT == 4
+#  define BUFFER_UCS4 "I"
+#elif SIZEOF_LONG == 4
+#  define BUFFER_UCS4 "L"
+#else
+#  error "unable to find BUFFER_UCS4"
+#endif
+
+    if (!PyUnicode_Check(unicode)) {
+        PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
+        return -1;
+    }
+    Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+
+    // Native ASCII
+    if (PyUnicode_IS_ASCII(unicode)
+        && (requested_formats & PyUnicode_FORMAT_ASCII))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_1BYTE_DATA(unicode),
+                              1, "B", PyUnicode_FORMAT_ASCII);
+    }
+
+    // Native UCS1
+    int kind = PyUnicode_KIND(unicode);
+    if (kind == PyUnicode_1BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS1))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_1BYTE_DATA(unicode),
+                              1, "B", PyUnicode_FORMAT_UCS1);
+    }
+
+    // Native UCS2
+    if (kind == PyUnicode_2BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS2))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_2BYTE_DATA(unicode),
+                              2, "H", PyUnicode_FORMAT_UCS2);
+    }
+
+    // Convert ASCII or UCS1 to UCS2
+    if (kind == PyUnicode_1BYTE_KIND
+        && requested_formats & PyUnicode_FORMAT_UCS2)
+    {
+        Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+        Py_UCS2 *ucs2 = PyMem_Malloc((len + 1) * sizeof(Py_UCS2));
+        if (!ucs2) {
+            PyErr_NoMemory();
+            return -1;
+        }
+
+        _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
+                                 PyUnicode_1BYTE_DATA(unicode),
+                                 PyUnicode_1BYTE_DATA(unicode) + len,
+                                 ucs2);
+        ucs2[len] = 0;
+
+        return unicode_export(unicode, view,
+                              len, ucs2,
+                              2, "H", PyUnicode_FORMAT_UCS2);
+    }
+
+    // Native UCS4
+    if (kind == PyUnicode_4BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS4))
+    {
+        return unicode_export(unicode, view,
+                              len, PyUnicode_4BYTE_DATA(unicode),
+                              4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+    }
+
+    // Convert ASCII, UCS1 or UCS2 to UCS4
+    if (requested_formats & PyUnicode_FORMAT_UCS4) {
+        Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
+        if (ucs4 == NULL) {
+            return -1;
+        }
+        return unicode_export(unicode, view,
+                              len, ucs4,
+                              4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
+    }
+
+    // Encode UCS1, UCS2 or UCS4 to UTF-8
+    if (requested_formats & PyUnicode_FORMAT_UTF8) {
+        Py_ssize_t nbytes;
+        const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &nbytes);
+        if (utf8 == NULL) {
+            return -1;
+        }
+        return unicode_export(unicode, view,
+                              nbytes, utf8,
+                              1, "B", PyUnicode_FORMAT_UTF8);
+    }
+
+    PyErr_Format(PyExc_ValueError, "unable to find a matching export format");
+    return -1;
+
+#undef BUFFER_UCS4
+}
+
+
+int
+PyUnicode_GetBufferFormat(const Py_buffer *view, uint32_t *format)
+{
+    if (view->obj == NULL || !PyUnicode_Check(view->obj)) {
+        PyErr_SetString(PyExc_ValueError, "not a str export");
+        return -1;
+    }
+    uintptr_t internal_format = (uintptr_t)view->internal;
+
+    switch (internal_format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+    case PyUnicode_FORMAT_UCS1:
+    case PyUnicode_FORMAT_UCS2:
+    case PyUnicode_FORMAT_UCS4:
+    case PyUnicode_FORMAT_UTF8:
+        break;
+    default:
+        PyErr_SetString(PyExc_ValueError, "invalid format");
+        return -1;
+    }
+    *format = (uint32_t)internal_format;
+    return 0;
+}
+
+
+static void
+unicode_releasebuffer(PyObject *unicode, Py_buffer *view)
+{
+    uintptr_t format = (uintptr_t)view->internal;
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+        break;
+    case PyUnicode_FORMAT_UCS1:
+        break;
+    case PyUnicode_FORMAT_UCS2:
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) {
+            PyMem_Free(view->buf);
+        }
+        break;
+    case PyUnicode_FORMAT_UTF8:
+        break;
+    default:
+        // ignore silently an unknown format
+        break;
+    }
+}
+
+PyObject*
+PyUnicode_Import(const void *data, Py_ssize_t nbytes,
+                 uint32_t format)
+{
+    if (nbytes < 0) {
+        PyErr_SetString(PyExc_ValueError, "Negative nbytes");
+        return NULL;
+    }
+
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+        return PyUnicode_DecodeASCII((const char*)data, nbytes, NULL);
+
+    case PyUnicode_FORMAT_UCS1:
+        return _PyUnicode_FromUCS1(data, nbytes);
+
+    case PyUnicode_FORMAT_UCS2:
+        if (nbytes % 2) {
+            PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 2: %zd",
+                         nbytes);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS2(data, nbytes / 2);
+
+    case PyUnicode_FORMAT_UCS4:
+        if (nbytes % 4) {
+            PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 4: %zd",
+                         nbytes);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS4(data, nbytes / 4);
+
+    case PyUnicode_FORMAT_UTF8:
+        return PyUnicode_DecodeUTF8((const char*)data, nbytes, NULL);
+
+    default:
+        PyErr_Format(PyExc_ValueError, "unknown format: %i", format);
+        return NULL;
+    }
+}
+
+
 PyObject*
 PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
 {
@@ -15248,6 +15464,10 @@ errors defaults to 'strict'.");
 
 static PyObject *unicode_iter(PyObject *seq);
 
+static PyBufferProcs unicode_as_buffer = {
+     .bf_releasebuffer = unicode_releasebuffer,
+};
+
 PyTypeObject PyUnicode_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "str",                        /* tp_name */
@@ -15268,7 +15488,7 @@ PyTypeObject PyUnicode_Type = {
     (reprfunc) unicode_str,       /* tp_str */
     PyObject_GenericGetAttr,      /* tp_getattro */
     0,                            /* tp_setattro */
-    0,                            /* tp_as_buffer */
+    &unicode_as_buffer,           /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
         Py_TPFLAGS_UNICODE_SUBCLASS |
         _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 1845334b244d8c9..1bfa238eb7054d3 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -717,6 +717,7 @@ EXPORT_FUNC(PyUnicode_EncodeFSDefault)
 EXPORT_FUNC(PyUnicode_EncodeLocale)
 EXPORT_FUNC(PyUnicode_EqualToUTF8)
 EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize)
+EXPORT_FUNC(PyUnicode_Export)
 EXPORT_FUNC(PyUnicode_Find)
 EXPORT_FUNC(PyUnicode_FindChar)
 EXPORT_FUNC(PyUnicode_Format)
@@ -730,9 +731,11 @@ EXPORT_FUNC(PyUnicode_FromStringAndSize)
 EXPORT_FUNC(PyUnicode_FromWideChar)
 EXPORT_FUNC(PyUnicode_FSConverter)
 EXPORT_FUNC(PyUnicode_FSDecoder)
+EXPORT_FUNC(PyUnicode_GetBufferFormat)
 EXPORT_FUNC(PyUnicode_GetDefaultEncoding)
 EXPORT_FUNC(PyUnicode_GetLength)
 EXPORT_FUNC(PyUnicode_GetSize)
+EXPORT_FUNC(PyUnicode_Import)
 EXPORT_FUNC(PyUnicode_InternFromString)
 EXPORT_FUNC(PyUnicode_InternImmortal)
 EXPORT_FUNC(PyUnicode_InternInPlace)