diff --git a/cpp/src/arrow/ipc/json_integration_test.cc b/cpp/src/arrow/ipc/json_integration_test.cc index 90864b7a260..9352fdefc17 100644 --- a/cpp/src/arrow/ipc/json_integration_test.cc +++ b/cpp/src/arrow/ipc/json_integration_test.cc @@ -255,7 +255,7 @@ static const char* JSON_EXAMPLE = R"example( "fields": [ { "name": "foo", - "type": {"name": "int", "isSigned": true, "bitWidth": 32}, + "type": {"name": "int", "isSigned": true, "bitWidth": 64}, "nullable": true, "children": [] }, { @@ -272,7 +272,7 @@ static const char* JSON_EXAMPLE = R"example( { "name": "foo", "count": 5, - "DATA": [1, 2, 3, 4, 5], + "DATA": ["1", "2", "3", "4", "5"], "VALIDITY": [1, 0, 1, 1, 1] }, { @@ -289,7 +289,7 @@ static const char* JSON_EXAMPLE = R"example( { "name": "foo", "count": 4, - "DATA": [1, 2, 3, 4], + "DATA": ["-1", "0", "9223372036854775807", "-9223372036854775808"], "VALIDITY": [1, 0, 1, 1] }, { diff --git a/cpp/src/arrow/ipc/json_internal.cc b/cpp/src/arrow/ipc/json_internal.cc index e04ead70b81..898977b02c9 100644 --- a/cpp/src/arrow/ipc/json_internal.cc +++ b/cpp/src/arrow/ipc/json_internal.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -440,29 +441,37 @@ class ArrayWriter { return Status::OK(); } - template ::value> - void WriteIntegerDataValues(const T& arr) { - static const char null_string[] = "0"; - const auto data = arr.raw_values(); + template + enable_if_t::value && + sizeof(CType) != sizeof(int64_t)> + WriteDataValues(const ArrayType& arr) { + static const std::string null_string = "0"; for (int64_t i = 0; i < arr.length(); ++i) { if (arr.IsValid(i)) { - IsSigned ? writer_->Int64(data[i]) : writer_->Uint64(data[i]); + writer_->Int64(arr.Value(i)); } else { - writer_->RawNumber(null_string, sizeof(null_string)); + writer_->RawNumber(null_string.data(), null_string.size()); } } } - template - enable_if_physical_signed_integer WriteDataValues( - const ArrayType& arr) { - WriteIntegerDataValues(arr); - } + template + enable_if_t::value && + sizeof(CType) == sizeof(int64_t)> + WriteDataValues(const ArrayType& arr) { + ::arrow::internal::StringFormatter::ArrowType> fmt; - template - enable_if_physical_unsigned_integer WriteDataValues( - const ArrayType& arr) { - WriteIntegerDataValues(arr); + static const std::string null_string = "0"; + for (int64_t i = 0; i < arr.length(); ++i) { + if (arr.IsValid(i)) { + fmt(arr.Value(i), + [&](util::string_view repr) { writer_->String(repr.data(), repr.size()); }); + } else { + writer_->String(null_string.data(), null_string.size()); + } + } } template @@ -1154,18 +1163,24 @@ enable_if_boolean UnboxValue(const rj::Value& val) { return val.GetBool(); } -template -enable_if_physical_signed_integer UnboxValue( - const rj::Value& val) { +template +enable_if_t::value && sizeof(CType) != sizeof(int64_t), CType> +UnboxValue(const rj::Value& val) { DCHECK(val.IsInt64()); - return static_cast(val.GetInt64()); + return static_cast(val.GetInt64()); } -template -enable_if_physical_unsigned_integer UnboxValue( - const rj::Value& val) { - DCHECK(val.IsUint()); - return static_cast(val.GetUint64()); +template +enable_if_t::value && sizeof(CType) == sizeof(int64_t), CType> +UnboxValue(const rj::Value& val) { + DCHECK(val.IsString()); + + CType out; + bool success = ::arrow::internal::ParseValue::ArrowType>( + val.GetString(), val.GetStringLength(), &out); + + DCHECK(success); + return out; } template diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index f61b690ac9d..937443b3012 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -646,6 +646,14 @@ template using enable_if_physical_unsigned_integer = enable_if_t::value, R>; +template +using is_physical_integer_type = + std::integral_constant::value || + is_physical_signed_integer_type::value>; + +template +using enable_if_physical_integer = enable_if_t::value, R>; + // Like is_floating_type but excluding half-floats which don't have a // float-like c type. template diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h index 7deecc00230..bc1b10fafe6 100644 --- a/cpp/src/arrow/util/formatting.h +++ b/cpp/src/arrow/util/formatting.h @@ -41,6 +41,9 @@ namespace internal { template class StringFormatter; +template +using Return = decltype(std::declval()(util::string_view{})); + template <> class StringFormatter { public: @@ -49,7 +52,7 @@ class StringFormatter { using value_type = bool; template - Status operator()(bool value, Appender&& append) { + Return operator()(bool value, Appender&& append) { if (value) { const char string[] = "true"; return append(util::string_view(string)); @@ -95,7 +98,7 @@ class IntToStringFormatterMixin { (is_signed ? 2 : 1) + std::numeric_limits::digits10; template - Status operator()(value_type value, Appender&& append) { + Return operator()(value_type value, Appender&& append) { char buffer[buffer_size]; char* ptr = buffer + buffer_size; int32_t size = 0; @@ -206,7 +209,7 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter { explicit FloatToStringFormatterMixin(const std::shared_ptr& = NULLPTR) {} template - Status operator()(value_type value, Appender&& append) { + Return operator()(value_type value, Appender&& append) { char buffer[buffer_size]; int size = FormatFloat(value, buffer, buffer_size); return append(util::string_view(buffer, size)); diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 0b699e8ac52..51543683c43 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -176,11 +176,15 @@ def _get_type(self): def generate_column(self, size, name=None): lower_bound, upper_bound = self._get_generated_data_bounds() - return self.generate_range(size, lower_bound, upper_bound, name=name) + return self.generate_range(size, lower_bound, upper_bound, + name=name, include_extremes=True) - def generate_range(self, size, lower, upper, name=None): - values = [int(x) for x in - np.random.randint(lower, upper, size=size)] + def generate_range(self, size, lower, upper, name=None, + include_extremes=False): + values = np.random.randint(lower, upper, size=size) + if include_extremes and size >= 2: + values[:2] = [lower, upper] + values = list(map(int if self.bit_width < 64 else str, values)) is_valid = self._make_is_valid(size) @@ -1540,9 +1544,10 @@ def _temp_path(): .skip_category('Java') # TODO(ARROW-7779) .skip_category('JS'), - generate_extension_case().skip_category('Go') - .skip_category('JS') - .skip_category('Rust'), + generate_extension_case() + .skip_category('Go') + .skip_category('JS') + .skip_category('Rust'), ] if flight: diff --git a/dev/archery/archery/integration/tester_go.py b/dev/archery/archery/integration/tester_go.py index 5dda81dfc2e..ea799c5a1bd 100644 --- a/dev/archery/archery/integration/tester_go.py +++ b/dev/archery/archery/integration/tester_go.py @@ -26,7 +26,8 @@ class GoTester(Tester): CONSUMER = True # FIXME(sbinet): revisit for Go modules - GOPATH = os.getenv('GOPATH', '~/go') + HOME = os.getenv('HOME', '~') + GOPATH = os.getenv('GOPATH', os.path.join(HOME, 'go')) GOBIN = os.environ.get('GOBIN', os.path.join(GOPATH, 'bin')) GO_INTEGRATION_EXE = os.path.join(GOBIN, 'arrow-json-integration-test') diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index 3b4e4ab1d0e..a9f44859a76 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -963,11 +963,11 @@ func i32ToJSON(arr *array.Int32) []interface{} { func i64FromJSON(vs []interface{}) []int64 { o := make([]int64, len(vs)) for i, v := range vs { - vv, err := v.(json.Number).Int64() + vv, err := strconv.ParseInt(v.(string), 10, 64) if err != nil { panic(err) } - o[i] = int64(vv) + o[i] = vv } return o } @@ -975,7 +975,11 @@ func i64FromJSON(vs []interface{}) []int64 { func i64ToJSON(arr *array.Int64) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { - o[i] = arr.Value(i) + if arr.IsValid(i) { + o[i] = strconv.FormatInt(arr.Value(i), 10) + } else { + o[i] = "0" + } } return o } @@ -1043,11 +1047,11 @@ func u32ToJSON(arr *array.Uint32) []interface{} { func u64FromJSON(vs []interface{}) []uint64 { o := make([]uint64, len(vs)) for i, v := range vs { - vv, err := strconv.ParseUint(v.(json.Number).String(), 10, 64) + vv, err := strconv.ParseUint(v.(string), 10, 64) if err != nil { panic(err) } - o[i] = uint64(vv) + o[i] = vv } return o } @@ -1055,7 +1059,11 @@ func u64FromJSON(vs []interface{}) []uint64 { func u64ToJSON(arr *array.Uint64) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { - o[i] = arr.Value(i) + if arr.IsValid(i) { + o[i] = strconv.FormatUint(arr.Value(i), 10) + } else { + o[i] = "0" + } } return o } @@ -1193,7 +1201,7 @@ func date32ToJSON(arr *array.Date32) []interface{} { func date64FromJSON(vs []interface{}) []arrow.Date64 { o := make([]arrow.Date64, len(vs)) for i, v := range vs { - vv, err := v.(json.Number).Int64() + vv, err := strconv.ParseInt(v.(string), 10, 64) if err != nil { panic(err) } @@ -1205,7 +1213,11 @@ func date64FromJSON(vs []interface{}) []arrow.Date64 { func date64ToJSON(arr *array.Date64) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { - o[i] = int64(arr.Value(i)) + if arr.IsValid(i) { + o[i] = strconv.FormatInt(int64(arr.Value(i)), 10) + } else { + o[i] = "0" + } } return o } @@ -1233,7 +1245,7 @@ func time32ToJSON(arr *array.Time32) []interface{} { func time64FromJSON(vs []interface{}) []arrow.Time64 { o := make([]arrow.Time64, len(vs)) for i, v := range vs { - vv, err := v.(json.Number).Int64() + vv, err := strconv.ParseInt(v.(string), 10, 64) if err != nil { panic(err) } @@ -1245,7 +1257,11 @@ func time64FromJSON(vs []interface{}) []arrow.Time64 { func time64ToJSON(arr *array.Time64) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { - o[i] = int64(arr.Value(i)) + if arr.IsValid(i) { + o[i] = strconv.FormatInt(int64(arr.Value(i)), 10) + } else { + o[i] = "0" + } } return o } @@ -1253,7 +1269,7 @@ func time64ToJSON(arr *array.Time64) []interface{} { func timestampFromJSON(vs []interface{}) []arrow.Timestamp { o := make([]arrow.Timestamp, len(vs)) for i, v := range vs { - vv, err := v.(json.Number).Int64() + vv, err := strconv.ParseInt(v.(string), 10, 64) if err != nil { panic(err) } @@ -1265,7 +1281,11 @@ func timestampFromJSON(vs []interface{}) []arrow.Timestamp { func timestampToJSON(arr *array.Timestamp) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { - o[i] = int64(arr.Value(i)) + if arr.IsValid(i) { + o[i] = strconv.FormatInt(int64(arr.Value(i)), 10) + } else { + o[i] = "0" + } } return o } @@ -1318,7 +1338,7 @@ func daytimeintervalToJSON(arr *array.DayTimeInterval) []interface{} { func durationFromJSON(vs []interface{}) []arrow.Duration { o := make([]arrow.Duration, len(vs)) for i, v := range vs { - vv, err := v.(json.Number).Int64() + vv, err := strconv.ParseInt(v.(string), 10, 64) if err != nil { panic(err) } @@ -1330,7 +1350,11 @@ func durationFromJSON(vs []interface{}) []arrow.Duration { func durationToJSON(arr *array.Duration) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { - o[i] = arr.Value(i) + if arr.IsValid(i) { + o[i] = strconv.FormatInt(int64(arr.Value(i)), 10) + } else { + o[i] = "0" + } } return o }