apache · wesm · Apr 4, 2017 · Apr 4, 2017 · jeffknupp · Apr 4, 2017
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
@@ -394,7 +394,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
       } else if (PyBytes_Check(item)) {
         bytes_obj = item;
       } else {
-        return Status::TypeError(
+        return Status::Invalid(
             "Value that cannot be converted to bytes was encountered");
       }
       // No error checking
@@ -429,7 +429,7 @@ class FixedWidthBytesConverter : public TypedConverter<FixedSizeBinaryBuilder> {
       } else if (PyBytes_Check(item)) {
         bytes_obj = item;
       } else {
-        return Status::TypeError(
+        return Status::Invalid(
             "Value that cannot be converted to bytes was encountered");
       }
       // No error checking
@@ -458,7 +458,7 @@ class UTF8Converter : public TypedConverter<StringBuilder> {
         RETURN_NOT_OK(typed_builder_->AppendNull());
         continue;
       } else if (!PyUnicode_Check(item)) {
-        return Status::TypeError("Non-unicode value encountered");
+        return Status::Invalid("Non-unicode value encountered");
       }
       tmp.reset(PyUnicode_AsUTF8String(item));
       RETURN_IF_PYERROR();
@@ -585,7 +585,7 @@ Status CheckPythonBytesAreFixedLength(PyObject* obj, Py_ssize_t expected_length)
     std::stringstream ss;
     ss << "Found byte string of length " << length << ", expected length is "
        << expected_length;
-    return Status::TypeError(ss.str());
+    return Status::Invalid(ss.str());
   }
   return Status::OK();
 }

diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
@@ -161,7 +161,7 @@ static Status AppendObjectStrings(
       obj = PyUnicode_AsUTF8String(obj);
       if (obj == NULL) {
         PyErr_Clear();
-        return Status::TypeError("failed converting unicode to UTF8");
+        return Status::Invalid("failed converting unicode to UTF8");
       }
       const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj));
       Status s = builder->Append(PyBytes_AS_STRING(obj), length);
@@ -200,7 +200,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas
       obj = PyUnicode_AsUTF8String(obj);
       if (obj == NULL) {
         PyErr_Clear();
-        return Status::TypeError("failed converting unicode to UTF8");
+        return Status::Invalid("failed converting unicode to UTF8");
       }
 
       RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width));
@@ -482,7 +482,7 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
   std::stringstream ss;
   ss << "Python object of type " << cpp_type_name << " is not None and is not a "
      << expected_type_name << " object";
-  return Status::TypeError(ss.str());
+  return Status::Invalid(ss.str());
 }
 
 Status PandasConverter::ConvertDates() {

diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
@@ -134,7 +134,7 @@ class ARROW_EXPORT Status {
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
   bool IsIOError() const { return code() == StatusCode::IOError; }
-
+  bool IsTypeError() const { return code() == StatusCode::TypeError; }
   bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
   bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }
 

diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
@@ -38,7 +38,13 @@
                            ListArray, StringArray,
                            DictionaryArray)
 
-from pyarrow.error import ArrowException
+from pyarrow.error import (ArrowException,
+                           ArrowKeyError,
+                           ArrowInvalid,
+                           ArrowIOError,
+                           ArrowMemoryError,
+                           ArrowNotImplementedError,
+                           ArrowTypeError)
 
 from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem
 from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface,

diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx
@@ -19,13 +19,52 @@ from pyarrow.includes.libarrow cimport CStatus
 from pyarrow.includes.common cimport c_string
 from pyarrow.compat import frombytes
 
+
 class ArrowException(Exception):
     pass
 
+
+class ArrowInvalid(ValueError, ArrowException):
+    pass
+
+
+class ArrowMemoryError(MemoryError, ArrowException):
+    pass
+
+
+class ArrowIOError(IOError, ArrowException):
+    pass
+
+
+class ArrowKeyError(KeyError, ArrowException):
+    pass
+
+
+class ArrowTypeError(TypeError, ArrowException):
+    pass
+
+
+class ArrowNotImplementedError(NotImplementedError, ArrowException):
+    pass
+
+
 cdef int check_status(const CStatus& status) nogil except -1:
     if status.ok():
         return 0
 
-    cdef c_string c_message = status.ToString()
     with gil:
-        raise ArrowException(frombytes(c_message))
+        message = frombytes(status.ToString())
+        if status.IsInvalid():
+            raise ArrowInvalid(message)
+        elif status.IsIOError():
+            raise ArrowIOError(message)
+        elif status.IsOutOfMemory():
+            raise ArrowMemoryError(message)
+        elif status.IsKeyError():
+            raise ArrowKeyError(message)
+        elif status.IsNotImplemented():
+            raise ArrowNotImplementedError(message)
+        elif status.IsTypeError():
+            raise ArrowTypeError(message)
+        else:
+            raise ArrowException(message)
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
@@ -43,10 +43,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_string ToString()
 
         c_bool ok()
+        c_bool IsIOError()
         c_bool IsOutOfMemory()
+        c_bool IsInvalid()
         c_bool IsKeyError()
         c_bool IsNotImplemented()
-        c_bool IsInvalid()
+        c_bool IsTypeError()
 
 
 cdef inline object PyObject_to_object(PyObject* o):

diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
@@ -17,7 +17,7 @@
 # under the License.
 
 from pyarrow.compat import unittest, u  # noqa
-import pyarrow
+import pyarrow as pa
 
 import datetime
 
@@ -26,32 +26,32 @@ class TestConvertList(unittest.TestCase):
 
     def test_boolean(self):
         expected = [True, None, False, None]
-        arr = pyarrow.from_pylist(expected)
+        arr = pa.from_pylist(expected)
         assert len(arr) == 4
         assert arr.null_count == 2
-        assert arr.type == pyarrow.bool_()
+        assert arr.type == pa.bool_()
         assert arr.to_pylist() == expected
 
     def test_empty_list(self):
-        arr = pyarrow.from_pylist([])
+        arr = pa.from_pylist([])
         assert len(arr) == 0
         assert arr.null_count == 0
-        assert arr.type == pyarrow.null()
+        assert arr.type == pa.null()
         assert arr.to_pylist() == []
 
     def test_all_none(self):
-        arr = pyarrow.from_pylist([None, None])
+        arr = pa.from_pylist([None, None])
         assert len(arr) == 2
         assert arr.null_count == 2
-        assert arr.type == pyarrow.null()
+        assert arr.type == pa.null()
         assert arr.to_pylist() == [None, None]
 
     def test_integer(self):
         expected = [1, None, 3, None]
-        arr = pyarrow.from_pylist(expected)
+        arr = pa.from_pylist(expected)
         assert len(arr) == 4
         assert arr.null_count == 2
-        assert arr.type == pyarrow.int64()
+        assert arr.type == pa.int64()
         assert arr.to_pylist() == expected
 
     def test_garbage_collection(self):
@@ -60,57 +60,57 @@ def test_garbage_collection(self):
         # Force the cyclic garbage collector to run
         gc.collect()
 
-        bytes_before = pyarrow.total_allocated_bytes()
-        pyarrow.from_pylist([1, None, 3, None])
+        bytes_before = pa.total_allocated_bytes()
+        pa.from_pylist([1, None, 3, None])
         gc.collect()
-        assert pyarrow.total_allocated_bytes() == bytes_before
+        assert pa.total_allocated_bytes() == bytes_before
 
     def test_double(self):
         data = [1.5, 1, None, 2.5, None, None]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 6
         assert arr.null_count == 3
-        assert arr.type == pyarrow.float64()
+        assert arr.type == pa.float64()
         assert arr.to_pylist() == data
 
     def test_unicode(self):
         data = [u'foo', u'bar', None, u'mañana']
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
         assert arr.null_count == 1
-        assert arr.type == pyarrow.string()
+        assert arr.type == pa.string()
         assert arr.to_pylist() == data
 
     def test_bytes(self):
         u1 = b'ma\xc3\xb1ana'
         data = [b'foo',
                 u1.decode('utf-8'),  # unicode gets encoded,
                 None]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 3
         assert arr.null_count == 1
-        assert arr.type == pyarrow.binary()
+        assert arr.type == pa.binary()
         assert arr.to_pylist() == [b'foo', u1, None]
 
     def test_fixed_size_bytes(self):
         data = [b'foof', None, b'barb', b'2346']
-        arr = pyarrow.from_pylist(data, type=pyarrow.binary(4))
+        arr = pa.from_pylist(data, type=pa.binary(4))
         assert len(arr) == 4
         assert arr.null_count == 1
-        assert arr.type == pyarrow.binary(4)
+        assert arr.type == pa.binary(4)
         assert arr.to_pylist() == data
 
     def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
         data = [b'foo', None, b'barb', b'2346']
-        with self.assertRaises(pyarrow.error.ArrowException):
-            pyarrow.from_pylist(data, type=pyarrow.binary(4))
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist(data, type=pa.binary(4))
 
     def test_date(self):
         data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1),
                 datetime.date(2040, 2, 26)]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
-        assert arr.type == pyarrow.date64()
+        assert arr.type == pa.date64()
         assert arr.null_count == 1
         assert arr[0].as_py() == datetime.date(2000, 1, 1)
         assert arr[1].as_py() is None
@@ -124,9 +124,9 @@ def test_timestamp(self):
             datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
             datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
         ]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
-        assert arr.type == pyarrow.timestamp('us')
+        assert arr.type == pa.timestamp('us')
         assert arr.null_count == 1
         assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1,
                                                    23, 34, 123456)
@@ -137,28 +137,28 @@ def test_timestamp(self):
                                                    46, 57, 437699)
 
     def test_mixed_nesting_levels(self):
-        pyarrow.from_pylist([1, 2, None])
-        pyarrow.from_pylist([[1], [2], None])
-        pyarrow.from_pylist([[1], [2], [None]])
+        pa.from_pylist([1, 2, None])
+        pa.from_pylist([[1], [2], None])
+        pa.from_pylist([[1], [2], [None]])
 
-        with self.assertRaises(pyarrow.ArrowException):
-            pyarrow.from_pylist([1, 2, [1]])
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist([1, 2, [1]])
 
-        with self.assertRaises(pyarrow.ArrowException):
-            pyarrow.from_pylist([1, 2, []])
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist([1, 2, []])
 
-        with self.assertRaises(pyarrow.ArrowException):
-            pyarrow.from_pylist([[1], [2], [None, [1]]])
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist([[1], [2], [None, [1]]])
 
     def test_list_of_int(self):
         data = [[1, 2, 3], [], None, [1, 2]]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
         assert arr.null_count == 1
-        assert arr.type == pyarrow.list_(pyarrow.int64())
+        assert arr.type == pa.list_(pa.int64())
         assert arr.to_pylist() == data
 
     def test_mixed_types_fails(self):
         data = ['a', 1, 2.0]
-        with self.assertRaises(pyarrow.error.ArrowException):
-            pyarrow.from_pylist(data)
+        with self.assertRaises(pa.ArrowException):
+            pa.from_pylist(data)
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
@@ -266,7 +266,7 @@ def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
         values = [b'foo', None, b'ba', None, None, b'hey']
         df = pd.DataFrame({'strings': values})
         schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
-        with self.assertRaises(A.error.ArrowException):
+        with self.assertRaises(A.ArrowInvalid):
             A.Table.from_pandas(df, schema=schema)
 
     def test_timestamps_notimezone_no_nulls(self):
@@ -409,7 +409,7 @@ def test_category(self):
 
     def test_mixed_types_fails(self):
         data = pd.DataFrame({'a': ['a', 1, 2.0]})
-        with self.assertRaises(A.error.ArrowException):
+        with self.assertRaises(A.ArrowException):
             A.Table.from_pandas(data)
 
     def test_strided_data_import(self):

diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
@@ -45,7 +45,7 @@ def tearDown(self):
                 pass
 
     def test_file_not_exist(self):
-        with self.assertRaises(pa.ArrowException):
+        with self.assertRaises(pa.ArrowIOError):
             FeatherReader('test_invalid_file')
 
     def _get_null_counts(self, path, columns=None):