python · encukou · Mar 11, 2020 · Mar 11, 2020 · Mar 11, 2020 · skrah
diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py
@@ -2754,54 +2754,56 @@ def test_memoryview_cast_1D_ND(self):
         # be 1D, at least one format must be 'c', 'b' or 'B'.
         for _tshape in gencastshapes():
             for char in fmtdict['@']:
-                tfmt = ('', '@')[randrange(2)] + char
-                tsize = struct.calcsize(tfmt)
-                n = prod(_tshape) * tsize
-                obj = 'memoryview' if is_byte_format(tfmt) else 'bytefmt'
-                for fmt, items, _ in iter_format(n, obj):
-                    size = struct.calcsize(fmt)
-                    shape = [n] if n > 0 else []
-                    tshape = _tshape + [size]
-
-                    ex = ndarray(items, shape=shape, format=fmt)
-                    m = memoryview(ex)
-
-                    titems, tshape = cast_items(ex, tfmt, tsize, shape=tshape)
-
-                    if titems is None:
-                        self.assertRaises(TypeError, m.cast, tfmt, tshape)
-                        continue
-                    if titems == 'nan':
-                        continue # NaNs in lists are a recipe for trouble.
-
-                    # 1D -> ND
-                    nd = ndarray(titems, shape=tshape, format=tfmt)
-
-                    m2 = m.cast(tfmt, shape=tshape)
-                    ndim = len(tshape)
-                    strides = nd.strides
-                    lst = nd.tolist()
-                    self.verify(m2, obj=ex,
-                                itemsize=tsize, fmt=tfmt, readonly=True,
-                                ndim=ndim, shape=tshape, strides=strides,
-                                lst=lst, cast=True)
-
-                    # ND -> 1D
-                    m3 = m2.cast(fmt)
-                    m4 = m2.cast(fmt, shape=shape)
-                    ndim = len(shape)
-                    strides = ex.strides
-                    lst = ex.tolist()
-
-                    self.verify(m3, obj=ex,
-                                itemsize=size, fmt=fmt, readonly=True,
-                                ndim=ndim, shape=shape, strides=strides,
-                                lst=lst, cast=True)
+                with self.subTest(_tshape=_tshape, char=char):
+                    tfmt = ('', '@')[randrange(2)] + char
+                    tsize = struct.calcsize(tfmt)
+                    n = prod(_tshape) * tsize
+                    obj = 'memoryview' if is_byte_format(tfmt) else 'bytefmt'
+                    for fmt, items, _ in iter_format(n, obj):
+                        size = struct.calcsize(fmt)
+                        shape = [n] if n > 0 else []
+                        tshape = _tshape + [size]
 
-                    self.verify(m4, obj=ex,
-                                itemsize=size, fmt=fmt, readonly=True,
-                                ndim=ndim, shape=shape, strides=strides,
-                                lst=lst, cast=True)
+                        ex = ndarray(items, shape=shape, format=fmt)
+                        m = memoryview(ex)
+
+                        titems, tshape = cast_items(ex, tfmt, tsize,
+                                                    shape=tshape)
+
+                        if titems is None:
+                            self.assertRaises(TypeError, m.cast, tfmt, tshape)
+                            continue
+                        if titems == 'nan':
+                            continue # NaNs in lists are a recipe for trouble.
+
+                        # 1D -> ND
+                        nd = ndarray(titems, shape=tshape, format=tfmt)
+
+                        m2 = m.cast(tfmt, shape=tshape)
+                        ndim = len(tshape)
+                        strides = nd.strides
+                        lst = nd.tolist()
+                        self.verify(m2, obj=ex,
+                                    itemsize=tsize, fmt=tfmt, readonly=True,
+                                    ndim=ndim, shape=tshape, strides=strides,
+                                    lst=lst, cast=True)
+
+                        # ND -> 1D
+                        m3 = m2.cast(fmt)
+                        m4 = m2.cast(fmt, shape=shape)
+                        ndim = len(shape)
+                        strides = ex.strides
+                        lst = ex.tolist()
+
+                        self.verify(m3, obj=ex,
+                                    itemsize=size, fmt=fmt, readonly=True,
+                                    ndim=ndim, shape=shape, strides=strides,
+                                    lst=lst, cast=True)
+
+                        self.verify(m4, obj=ex,
+                                    itemsize=size, fmt=fmt, readonly=True,
+                                    ndim=ndim, shape=shape, strides=strides,
+                                    lst=lst, cast=True)
 
         if ctypes:
             # format: "T{>l:x:>d:y:}"

diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
@@ -516,8 +516,18 @@ def __bool__(self):
                 self.fail("Expected OSError: struct.pack(%r, "
                           "ExplodingBool())" % (prefix + '?'))
 
-        for c in [b'\x01', b'\x7f', b'\xff', b'\x0f', b'\xf0']:
-            self.assertTrue(struct.unpack('>?', c)[0])
+            # To avoid undefined behavior in the C code, we assume that in
+            # every mode, the size is 1, '\x00' is false, and `\x01` is true.
+            # If there is a platform where this is not the case, the unpack
+            # code and the tests below will need adjusting. See bpo-39689.
+
+            self.assertEqual(packedFalse, b'\0' * len(false))
+            self.assertEqual(packedTrue, b'\x01' * len(true))
+
+            self.assertFalse(struct.unpack(prefix + '?', b'\0')[0])
+
+            for c in [b'\x01', b'\x7f', b'\xff', b'\x0f', b'\xf0']:
+                self.assertTrue(struct.unpack(prefix + '?', c)[0])
 
     def test_count_overflow(self):
         hugecount = '{}b'.format(sys.maxsize+1)

diff --git a/Misc/NEWS.d/next/Library/2020-03-11-15-46-59.bpo-39689.uzQhJX.rst b/Misc/NEWS.d/next/Library/2020-03-11-15-46-59.bpo-39689.uzQhJX.rst
@@ -0,0 +1,3 @@
+The struct module and memoryview now load native booleans as ``char`` rather
+than ``_Bool`` to avoid triggering undefined behavior. For valid native _Bool,
+the behavior is the same on all supported (tested) platforms.
diff --git a/Modules/_struct.c b/Modules/_struct.c
@@ -482,9 +482,18 @@ nu_ulonglong(const char *p, const formatdef *f)
 static PyObject *
 nu_bool(const char *p, const formatdef *f)
 {
-    _Bool x;
-    memcpy((char *)&x, p, sizeof x);
-    return PyBool_FromLong(x != 0);
+    /* The usual thing to do here is to memcpy *p to a _Bool variable.
+     * However, that would expose C undefined behavior to Python code:
+     * any bit-patterns except 0 and 1 would trigger UB.
+     * So we instead cast *p from char to _Bool, as in bu_bool.
+     *
+     * We assume, and assert, that sizeof(_Bool) is 1.
+     * We also assume the bit-pattern for (_Bool)0 is the same as for (char)0;
+     * this is covered by tests.
+     * See bpo-39689.
+     */
+    assert(sizeof(_Bool) == sizeof(char));
+    return PyBool_FromLong((_Bool)*p != 0);
 }
 
 

diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c
@@ -1699,7 +1699,9 @@ unpack_single(const char *ptr, const char *fmt)
     case 'l': UNPACK_SINGLE(ld, ptr, long); goto convert_ld;
 
     /* boolean */
-    case '?': UNPACK_SINGLE(ld, ptr, _Bool); goto convert_bool;
+    // memcpy-ing values other than 0 or 1 to a _Bool variable triggers
+    // undefined behavior, so cast from char instead. See bpo-39689.
+    case '?': ld = (_Bool)*ptr; goto convert_bool;
 
     /* unsigned integers */
     case 'H': UNPACK_SINGLE(lu, ptr, unsigned short); goto convert_lu;