apache · BryanCutler · Apr 10, 2018 · Apr 10, 2018 · Apr 10, 2018 · Apr 10, 2018
diff --git a/cpp/src/arrow/python/decimal.cc b/cpp/src/arrow/python/decimal.cc
@@ -184,14 +184,15 @@ Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_sc
 }
 
 Status DecimalMetadata::Update(PyObject* object) {
- DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal";
+ bool is_decimal = PyDecimal_Check(object);
+ DCHECK(is_decimal) << "Object is not a Python Decimal";
 
- if (ARROW_PREDICT_FALSE(PyDecimal_ISNAN(object))) {
+ if (ARROW_PREDICT_FALSE(!is_decimal || PyDecimal_ISNAN(object))) {
  return Status::OK();
  }
 
- int32_t precision;
- int32_t scale;
+ int32_t precision = 0;
+ int32_t scale = 0;
  RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
  return Update(precision, scale);
 }

diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -743,7 +743,9 @@ Status NumPyConverter::ConvertDecimals() {
 
  if (type_ == NULLPTR) {
  for (PyObject* object : objects) {
- RETURN_NOT_OK(max_decimal_metadata.Update(object));
+ if (!internal::PandasObjectIsNull(object)) {
+ RETURN_NOT_OK(max_decimal_metadata.Update(object));
+ }
  }
 
  type_ =
@@ -758,22 +760,19 @@ Status NumPyConverter::ConvertDecimals() {
  for (PyObject* object : objects) {
  const int is_decimal = PyObject_IsInstance(object, decimal_type_.obj());
 
- if (ARROW_PREDICT_FALSE(is_decimal == 0)) {
+ if (is_decimal == 1) {
+ Decimal128 value;
+ RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, &value));
+ RETURN_NOT_OK(builder.Append(value));
+ } else if (is_decimal == 0 && internal::PandasObjectIsNull(object)) {
+ RETURN_NOT_OK(builder.AppendNull());
+ } else {
+ // PyObject_IsInstance could error and set an exception
+ RETURN_IF_PYERROR();
  std::stringstream ss;
  ss << "Error converting from Python objects to Decimal: ";
  RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss));
  return Status::Invalid(ss.str());
- } else if (ARROW_PREDICT_FALSE(is_decimal == -1)) {
- DCHECK_NE(PyErr_Occurred(), nullptr);
- RETURN_IF_PYERROR();
- }
-
- if (internal::PandasObjectIsNull(object)) {
- RETURN_NOT_OK(builder.AppendNull());
- } else {
- Decimal128 value;
- RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, &value));
- RETURN_NOT_OK(builder.Append(value));
  }
  }
  return PushBuilderResult(&builder);

diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
@@ -80,9 +80,15 @@ def _check_pandas_roundtrip(df, expected=None, nthreads=1,
  else False))
 
 
-def _check_series_roundtrip(s, type_=None):
+def _check_series_roundtrip(s, type_=None, expected_pa_type=None):
  arr = pa.array(s, from_pandas=True, type=type_)
 
+ if type_ is not None and expected_pa_type is None:
+ expected_pa_type = type_
+
+ if expected_pa_type is not None:
+ assert arr.type == expected_pa_type
+
  result = pd.Series(arr.to_pandas(), name=s.name)
  if patypes.is_timestamp(arr.type) and arr.type.tz is not None:
  result = (result.dt.tz_localize('utc')
@@ -1149,19 +1155,15 @@ def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
 
  def test_variable_size_bytes(self):
  s = pd.Series([b'123', b'', b'a', None])
- arr = pa.Array.from_pandas(s, type=pa.binary())
- assert arr.type == pa.binary()
  _check_series_roundtrip(s, type_=pa.binary())
 
  def test_binary_from_bytearray(self):
- s = pd.Series([bytearray(b'123'), bytearray(b''), bytearray(b'a')])
+ s = pd.Series([bytearray(b'123'), bytearray(b''), bytearray(b'a'),
+ None])
  # Explicitly set type
- arr = pa.Array.from_pandas(s, type=pa.binary())
- assert arr.type == pa.binary()
- # Infer type from bytearrays
- arr = pa.Array.from_pandas(s)
- assert arr.type == pa.binary()
  _check_series_roundtrip(s, type_=pa.binary())
+ # Infer type from bytearrays
+ _check_series_roundtrip(s, expected_pa_type=pa.binary())
 
  def test_table_empty_str(self):
  values = ['', '', '', '', '']
@@ -1326,6 +1328,18 @@ def test_decimal_with_different_precisions(self):
  expected = [decimal.Decimal('0.01000'), decimal.Decimal('0.00100')]
  assert array.to_pylist() == expected
 
+ def test_decimal_with_None_explicit_type(self):
+ series = pd.Series([decimal.Decimal('3.14'), None])
+ _check_series_roundtrip(series, type_=pa.decimal128(12, 5))
+
+ # Test that having all None values still produces decimal array
+ series = pd.Series([None] * 2)
+ _check_series_roundtrip(series, type_=pa.decimal128(12, 5))
+
+ def test_decimal_with_None_infer_type(self):
+ series = pd.Series([decimal.Decimal('3.14'), None])
+ _check_series_roundtrip(series, expected_pa_type=pa.decimal128(3, 2))
+
 
 class TestListTypes(object):
  """