Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cpp/src/arrow/python/builtin_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
} else if (PyBytes_Check(item)) {
bytes_obj = item;
} else {
return Status::TypeError(
return Status::Invalid(
"Value that cannot be converted to bytes was encountered");
}
// No error checking
Expand Down Expand Up @@ -429,7 +429,7 @@ class FixedWidthBytesConverter : public TypedConverter<FixedSizeBinaryBuilder> {
} else if (PyBytes_Check(item)) {
bytes_obj = item;
} else {
return Status::TypeError(
return Status::Invalid(
"Value that cannot be converted to bytes was encountered");
}
// No error checking
Expand Down Expand Up @@ -458,7 +458,7 @@ class UTF8Converter : public TypedConverter<StringBuilder> {
RETURN_NOT_OK(typed_builder_->AppendNull());
continue;
} else if (!PyUnicode_Check(item)) {
return Status::TypeError("Non-unicode value encountered");
return Status::Invalid("Non-unicode value encountered");
}
tmp.reset(PyUnicode_AsUTF8String(item));
RETURN_IF_PYERROR();
Expand Down Expand Up @@ -585,7 +585,7 @@ Status CheckPythonBytesAreFixedLength(PyObject* obj, Py_ssize_t expected_length)
std::stringstream ss;
ss << "Found byte string of length " << length << ", expected length is "
<< expected_length;
return Status::TypeError(ss.str());
return Status::Invalid(ss.str());
}
return Status::OK();
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/python/pandas_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ static Status AppendObjectStrings(
obj = PyUnicode_AsUTF8String(obj);
if (obj == NULL) {
PyErr_Clear();
return Status::TypeError("failed converting unicode to UTF8");
return Status::Invalid("failed converting unicode to UTF8");
}
const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj));
Status s = builder->Append(PyBytes_AS_STRING(obj), length);
Expand Down Expand Up @@ -200,7 +200,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas
obj = PyUnicode_AsUTF8String(obj);
if (obj == NULL) {
PyErr_Clear();
return Status::TypeError("failed converting unicode to UTF8");
return Status::Invalid("failed converting unicode to UTF8");
}

RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width));
Expand Down Expand Up @@ -482,7 +482,7 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
std::stringstream ss;
ss << "Python object of type " << cpp_type_name << " is not None and is not a "
<< expected_type_name << " object";
return Status::TypeError(ss.str());
return Status::Invalid(ss.str());
}

Status PandasConverter::ConvertDates() {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class ARROW_EXPORT Status {
bool IsKeyError() const { return code() == StatusCode::KeyError; }
bool IsInvalid() const { return code() == StatusCode::Invalid; }
bool IsIOError() const { return code() == StatusCode::IOError; }

bool IsTypeError() const { return code() == StatusCode::TypeError; }
bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }

Expand Down
8 changes: 7 additions & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@
ListArray, StringArray,
DictionaryArray)

from pyarrow.error import ArrowException
from pyarrow.error import (ArrowException,
ArrowKeyError,
ArrowInvalid,
ArrowIOError,
ArrowMemoryError,
ArrowNotImplementedError,
ArrowTypeError)

from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem
from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface,
Expand Down
43 changes: 41 additions & 2 deletions python/pyarrow/error.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,52 @@ from pyarrow.includes.libarrow cimport CStatus
from pyarrow.includes.common cimport c_string
from pyarrow.compat import frombytes


class ArrowException(Exception):
pass


class ArrowInvalid(ValueError, ArrowException):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this an antipattern (deriving from a built-in exception or, worse, raising them directly)? If a user combines some Arrow code with their own in a try block, they may inadvertently catch, say, an ArrowInvalid Exception when they explicitly were trying to handle a ValueError for non-arrow code in the try block.

tl;dr if you structure your Exceptions like this, client code has no idea from where the issue originated and has no easy way of disambiguating in the except block (if they're even aware of the Arrow Exception class hierarchy to begin with).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it an antipattern? I have never thought there was anything wrong either with

  • deriving from built-in exceptions, like ValueError
  • raising built-in exceptions directly (pretty much all Python data libraries -- pandas, NumPy, scikit-learn, etc. already do this)

So there's a couple ways to go:

  • Don't define user-defined exceptions, raise built-in exceptions only
  • Don't derive from a base ArrowException -- which is there to help indicate that the error arose in C++ code
  • Derive from base Exception (worst option, IMHO)

adding @jreback for more opinions (I did this at his asking)

Copy link
Contributor

@jreback jreback Apr 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my main concern was to NOT have the arrow exception hierarchy just be based on Exception. Being able to catch ValueError, TypeError, KeyError, IndexError (subclass) as errors is very pythonic.

As for the naming having ArrowTypeError(ArrowException, TypeError). I am not how useful that actually is in practice. I would say if you have a need to differentiate between two (or more TypeError s), then having a custom exception is useful (as opposed to having to check the actual error message).

So fine with this PR (though maybe raise the standard python exceptions, unless you have a real need for them).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My preference is to only raise these exceptions for errors that originate from the C++ libraries, otherwise use the Python built-in ones. Sound good?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sgtm.

pass


class ArrowMemoryError(MemoryError, ArrowException):
pass


class ArrowIOError(IOError, ArrowException):
pass


class ArrowKeyError(KeyError, ArrowException):
pass


class ArrowTypeError(TypeError, ArrowException):
pass


class ArrowNotImplementedError(NotImplementedError, ArrowException):
pass


cdef int check_status(const CStatus& status) nogil except -1:
if status.ok():
return 0

cdef c_string c_message = status.ToString()
with gil:
raise ArrowException(frombytes(c_message))
message = frombytes(status.ToString())
if status.IsInvalid():
raise ArrowInvalid(message)
elif status.IsIOError():
raise ArrowIOError(message)
elif status.IsOutOfMemory():
raise ArrowMemoryError(message)
elif status.IsKeyError():
raise ArrowKeyError(message)
elif status.IsNotImplemented():
raise ArrowNotImplementedError(message)
elif status.IsTypeError():
raise ArrowTypeError(message)
else:
raise ArrowException(message)
4 changes: 3 additions & 1 deletion python/pyarrow/includes/common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_string ToString()

c_bool ok()
c_bool IsIOError()
c_bool IsOutOfMemory()
c_bool IsInvalid()
c_bool IsKeyError()
c_bool IsNotImplemented()
c_bool IsInvalid()
c_bool IsTypeError()


cdef inline object PyObject_to_object(PyObject* o):
Expand Down
78 changes: 39 additions & 39 deletions python/pyarrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# under the License.

from pyarrow.compat import unittest, u # noqa
import pyarrow
import pyarrow as pa

import datetime

Expand All @@ -26,32 +26,32 @@ class TestConvertList(unittest.TestCase):

def test_boolean(self):
expected = [True, None, False, None]
arr = pyarrow.from_pylist(expected)
arr = pa.from_pylist(expected)
assert len(arr) == 4
assert arr.null_count == 2
assert arr.type == pyarrow.bool_()
assert arr.type == pa.bool_()
assert arr.to_pylist() == expected

def test_empty_list(self):
arr = pyarrow.from_pylist([])
arr = pa.from_pylist([])
assert len(arr) == 0
assert arr.null_count == 0
assert arr.type == pyarrow.null()
assert arr.type == pa.null()
assert arr.to_pylist() == []

def test_all_none(self):
arr = pyarrow.from_pylist([None, None])
arr = pa.from_pylist([None, None])
assert len(arr) == 2
assert arr.null_count == 2
assert arr.type == pyarrow.null()
assert arr.type == pa.null()
assert arr.to_pylist() == [None, None]

def test_integer(self):
expected = [1, None, 3, None]
arr = pyarrow.from_pylist(expected)
arr = pa.from_pylist(expected)
assert len(arr) == 4
assert arr.null_count == 2
assert arr.type == pyarrow.int64()
assert arr.type == pa.int64()
assert arr.to_pylist() == expected

def test_garbage_collection(self):
Expand All @@ -60,57 +60,57 @@ def test_garbage_collection(self):
# Force the cyclic garbage collector to run
gc.collect()

bytes_before = pyarrow.total_allocated_bytes()
pyarrow.from_pylist([1, None, 3, None])
bytes_before = pa.total_allocated_bytes()
pa.from_pylist([1, None, 3, None])
gc.collect()
assert pyarrow.total_allocated_bytes() == bytes_before
assert pa.total_allocated_bytes() == bytes_before

def test_double(self):
data = [1.5, 1, None, 2.5, None, None]
arr = pyarrow.from_pylist(data)
arr = pa.from_pylist(data)
assert len(arr) == 6
assert arr.null_count == 3
assert arr.type == pyarrow.float64()
assert arr.type == pa.float64()
assert arr.to_pylist() == data

def test_unicode(self):
data = [u'foo', u'bar', None, u'mañana']
arr = pyarrow.from_pylist(data)
arr = pa.from_pylist(data)
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pyarrow.string()
assert arr.type == pa.string()
assert arr.to_pylist() == data

def test_bytes(self):
u1 = b'ma\xc3\xb1ana'
data = [b'foo',
u1.decode('utf-8'), # unicode gets encoded,
None]
arr = pyarrow.from_pylist(data)
arr = pa.from_pylist(data)
assert len(arr) == 3
assert arr.null_count == 1
assert arr.type == pyarrow.binary()
assert arr.type == pa.binary()
assert arr.to_pylist() == [b'foo', u1, None]

def test_fixed_size_bytes(self):
data = [b'foof', None, b'barb', b'2346']
arr = pyarrow.from_pylist(data, type=pyarrow.binary(4))
arr = pa.from_pylist(data, type=pa.binary(4))
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pyarrow.binary(4)
assert arr.type == pa.binary(4)
assert arr.to_pylist() == data

def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
data = [b'foo', None, b'barb', b'2346']
with self.assertRaises(pyarrow.error.ArrowException):
pyarrow.from_pylist(data, type=pyarrow.binary(4))
with self.assertRaises(pa.ArrowInvalid):
pa.from_pylist(data, type=pa.binary(4))

def test_date(self):
data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1),
datetime.date(2040, 2, 26)]
arr = pyarrow.from_pylist(data)
arr = pa.from_pylist(data)
assert len(arr) == 4
assert arr.type == pyarrow.date64()
assert arr.type == pa.date64()
assert arr.null_count == 1
assert arr[0].as_py() == datetime.date(2000, 1, 1)
assert arr[1].as_py() is None
Expand All @@ -124,9 +124,9 @@ def test_timestamp(self):
datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
]
arr = pyarrow.from_pylist(data)
arr = pa.from_pylist(data)
assert len(arr) == 4
assert arr.type == pyarrow.timestamp('us')
assert arr.type == pa.timestamp('us')
assert arr.null_count == 1
assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1,
23, 34, 123456)
Expand All @@ -137,28 +137,28 @@ def test_timestamp(self):
46, 57, 437699)

def test_mixed_nesting_levels(self):
pyarrow.from_pylist([1, 2, None])
pyarrow.from_pylist([[1], [2], None])
pyarrow.from_pylist([[1], [2], [None]])
pa.from_pylist([1, 2, None])
pa.from_pylist([[1], [2], None])
pa.from_pylist([[1], [2], [None]])

with self.assertRaises(pyarrow.ArrowException):
pyarrow.from_pylist([1, 2, [1]])
with self.assertRaises(pa.ArrowInvalid):
pa.from_pylist([1, 2, [1]])

with self.assertRaises(pyarrow.ArrowException):
pyarrow.from_pylist([1, 2, []])
with self.assertRaises(pa.ArrowInvalid):
pa.from_pylist([1, 2, []])

with self.assertRaises(pyarrow.ArrowException):
pyarrow.from_pylist([[1], [2], [None, [1]]])
with self.assertRaises(pa.ArrowInvalid):
pa.from_pylist([[1], [2], [None, [1]]])

def test_list_of_int(self):
data = [[1, 2, 3], [], None, [1, 2]]
arr = pyarrow.from_pylist(data)
arr = pa.from_pylist(data)
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pyarrow.list_(pyarrow.int64())
assert arr.type == pa.list_(pa.int64())
assert arr.to_pylist() == data

def test_mixed_types_fails(self):
data = ['a', 1, 2.0]
with self.assertRaises(pyarrow.error.ArrowException):
pyarrow.from_pylist(data)
with self.assertRaises(pa.ArrowException):
pa.from_pylist(data)
4 changes: 2 additions & 2 deletions python/pyarrow/tests/test_convert_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
values = [b'foo', None, b'ba', None, None, b'hey']
df = pd.DataFrame({'strings': values})
schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
with self.assertRaises(A.error.ArrowException):
with self.assertRaises(A.ArrowInvalid):
A.Table.from_pandas(df, schema=schema)

def test_timestamps_notimezone_no_nulls(self):
Expand Down Expand Up @@ -409,7 +409,7 @@ def test_category(self):

def test_mixed_types_fails(self):
data = pd.DataFrame({'a': ['a', 1, 2.0]})
with self.assertRaises(A.error.ArrowException):
with self.assertRaises(A.ArrowException):
A.Table.from_pandas(data)

def test_strided_data_import(self):
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def tearDown(self):
pass

def test_file_not_exist(self):
with self.assertRaises(pa.ArrowException):
with self.assertRaises(pa.ArrowIOError):
FeatherReader('test_invalid_file')

def _get_null_counts(self, path, columns=None):
Expand Down