pandas-dev · jreback · Oct 14, 2017 · Oct 9, 2017 · gfyoung · Oct 12, 2017
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -956,6 +956,7 @@ I/O
 - Bug in :meth:`DataFrame.to_html` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`)
 - Bug in :meth:`DataFrame.to_html` in which there was no validation of the ``justify`` parameter (:issue:`17527`)
 - Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`)
+- Bug in :func:`to_json` where several conditions (including objects with unprintable symbols, objects with deep recursion, overlong labels) caused segfaults instead of raising the appropriate exception (:issue:`14256`)
 
 Plotting
 ^^^^^^^^
@@ -1033,3 +1034,4 @@ Other
 ^^^^^
 - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`)
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
+
diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h
@@ -307,4 +307,11 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec,
                                        const char *buffer, size_t cbBuffer);
 EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t);
 
+#define Buffer_Reserve(__enc, __len)                                  \
+    if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
+        Buffer_Realloc((__enc), (__len));                             \
+    }
+
+void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded);
+
 #endif  // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_
diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c
@@ -714,11 +714,6 @@ int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc,
     }
 }
 
-#define Buffer_Reserve(__enc, __len)                                  \
-    if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
-        Buffer_Realloc((__enc), (__len));                             \
-    }
-
 #define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr;
 
 FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin,
@@ -976,6 +971,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
             }
 
             enc->iterEnd(obj, &tc);
+            Buffer_Reserve(enc, 2);
             Buffer_AppendCharUnchecked(enc, ']');
             break;
         }
@@ -1003,6 +999,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
             }
 
             enc->iterEnd(obj, &tc);
+            Buffer_Reserve(enc, 2);
             Buffer_AppendCharUnchecked(enc, '}');
             break;
         }

diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -783,6 +783,7 @@ static void NpyArr_getLabel(JSOBJ obj, JSONTypeContext *tc, size_t *outLen,
     JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
     PRINTMARK();
     *outLen = strlen(labels[idx]);
+    Buffer_Reserve(enc, *outLen);
     memcpy(enc->offset, labels[idx], sizeof(char) * (*outLen));
     enc->offset += *outLen;
     *outLen = 0;
@@ -879,7 +880,7 @@ int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) {
     NpyArrContext *npyarr;
     PRINTMARK();
 
-    if (PyErr_Occurred()) {
+    if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
         return 0;
     }
 
@@ -1224,6 +1225,10 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
     PyObject *attrName;
     char *attrStr;
 
+    if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
+        return 0;
+    }
+
     if (itemValue) {
         Py_DECREF(GET_TC(tc)->itemValue);
         GET_TC(tc)->itemValue = itemValue = NULL;

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -511,6 +511,51 @@ def test_blocks_compat_GH9037(self):
                            by_blocks=True,
                            check_exact=True)
 
+    def test_frame_nonprintable_bytes(self):
+        # GH14256: failing column caused segfaults, if it is not the last one
+
+        class BinaryThing(object):
+
+            def __init__(self, hexed):
+                self.hexed = hexed
+                if compat.PY2:
+                    self.binary = hexed.decode('hex')
+                else:
+                    self.binary = bytes.fromhex(hexed)
+
+            def __str__(self):
+                return self.hexed
+
+        hexed = '574b4454ba8c5eb4f98a8f45'
+        binthing = BinaryThing(hexed)
+
+        # verify the proper conversion of printable content
+        df_printable = DataFrame({'A': [binthing.hexed]})
+        assert df_printable.to_json() == '{"A":{"0":"%s"}}' % hexed
+
+        # check if non-printable content throws appropriate Exception
+        df_nonprintable = DataFrame({'A': [binthing]})
+        with pytest.raises(OverflowError):
+            df_nonprintable.to_json()
+
+        # the same with multiple columns threw segfaults
+        df_mixed = DataFrame({'A': [binthing], 'B': [1]},
+                             columns=['A', 'B'])
+        with pytest.raises(OverflowError):
+            df_mixed.to_json()
+
+        # default_handler should resolve exceptions for non-string types
+        assert df_nonprintable.to_json(default_handler=str) == \
+            '{"A":{"0":"%s"}}' % hexed
+        assert df_mixed.to_json(default_handler=str) == \
+            '{"A":{"0":"%s"},"B":{"0":1}}' % hexed
+
+    def test_label_overflow(self):
+        # GH14256: buffer length not checked when writing label
+        df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]})
+        assert df.to_json() == \
+            '{"%s":{"0":1},"foo":{"0":1337}}' % ('bar' * 100000)
+
     def test_series_non_unique_index(self):
         s = Series(['a', 'b'], index=[1, 1])