Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/src/arrow/ipc/json_integration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ static const char* JSON_EXAMPLE = R"example(
"fields": [
{
"name": "foo",
"type": {"name": "int", "isSigned": true, "bitWidth": 32},
"type": {"name": "int", "isSigned": true, "bitWidth": 64},
"nullable": true, "children": []
},
{
Expand All @@ -272,7 +272,7 @@ static const char* JSON_EXAMPLE = R"example(
{
"name": "foo",
"count": 5,
"DATA": [1, 2, 3, 4, 5],
"DATA": ["1", "2", "3", "4", "5"],
"VALIDITY": [1, 0, 1, 1, 1]
},
{
Expand All @@ -289,7 +289,7 @@ static const char* JSON_EXAMPLE = R"example(
{
"name": "foo",
"count": 4,
"DATA": [1, 2, 3, 4],
"DATA": ["-1", "0", "9223372036854775807", "-9223372036854775808"],
"VALIDITY": [1, 0, 1, 1]
},
{
Expand Down
63 changes: 39 additions & 24 deletions cpp/src/arrow/ipc/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <string>
#include <type_traits>
Expand Down Expand Up @@ -440,29 +441,37 @@ class ArrayWriter {
return Status::OK();
}

template <typename T, bool IsSigned = std::is_signed<typename T::value_type>::value>
void WriteIntegerDataValues(const T& arr) {
static const char null_string[] = "0";
const auto data = arr.raw_values();
template <typename ArrayType, typename TypeClass = typename ArrayType::TypeClass,
typename CType = typename TypeClass::c_type>
enable_if_t<is_physical_integer_type<TypeClass>::value &&
sizeof(CType) != sizeof(int64_t)>
WriteDataValues(const ArrayType& arr) {
static const std::string null_string = "0";
for (int64_t i = 0; i < arr.length(); ++i) {
if (arr.IsValid(i)) {
IsSigned ? writer_->Int64(data[i]) : writer_->Uint64(data[i]);
writer_->Int64(arr.Value(i));
} else {
writer_->RawNumber(null_string, sizeof(null_string));
writer_->RawNumber(null_string.data(), null_string.size());
}
}
}

template <typename ArrayType>
enable_if_physical_signed_integer<typename ArrayType::TypeClass> WriteDataValues(
const ArrayType& arr) {
WriteIntegerDataValues<ArrayType>(arr);
}
template <typename ArrayType, typename TypeClass = typename ArrayType::TypeClass,
typename CType = typename TypeClass::c_type>
enable_if_t<is_physical_integer_type<TypeClass>::value &&
sizeof(CType) == sizeof(int64_t)>
WriteDataValues(const ArrayType& arr) {
::arrow::internal::StringFormatter<typename CTypeTraits<CType>::ArrowType> fmt;

template <typename ArrayType>
enable_if_physical_unsigned_integer<typename ArrayType::TypeClass> WriteDataValues(
const ArrayType& arr) {
WriteIntegerDataValues<ArrayType>(arr);
static const std::string null_string = "0";
for (int64_t i = 0; i < arr.length(); ++i) {
if (arr.IsValid(i)) {
fmt(arr.Value(i),
[&](util::string_view repr) { writer_->String(repr.data(), repr.size()); });
} else {
writer_->String(null_string.data(), null_string.size());
}
}
}

template <typename ArrayType>
Expand Down Expand Up @@ -1154,18 +1163,24 @@ enable_if_boolean<T, bool> UnboxValue(const rj::Value& val) {
return val.GetBool();
}

template <typename T>
enable_if_physical_signed_integer<T, typename T::c_type> UnboxValue(
const rj::Value& val) {
template <typename T, typename CType = typename T::c_type>
enable_if_t<is_physical_integer_type<T>::value && sizeof(CType) != sizeof(int64_t), CType>
UnboxValue(const rj::Value& val) {
DCHECK(val.IsInt64());
return static_cast<typename T::c_type>(val.GetInt64());
return static_cast<CType>(val.GetInt64());
}

template <typename T>
enable_if_physical_unsigned_integer<T, typename T::c_type> UnboxValue(
const rj::Value& val) {
DCHECK(val.IsUint());
return static_cast<typename T::c_type>(val.GetUint64());
template <typename T, typename CType = typename T::c_type>
enable_if_t<is_physical_integer_type<T>::value && sizeof(CType) == sizeof(int64_t), CType>
UnboxValue(const rj::Value& val) {
DCHECK(val.IsString());

CType out;
bool success = ::arrow::internal::ParseValue<typename CTypeTraits<CType>::ArrowType>(
val.GetString(), val.GetStringLength(), &out);

DCHECK(success);
return out;
}

template <typename T>
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,14 @@ template <typename T, typename R = void>
using enable_if_physical_unsigned_integer =
enable_if_t<is_physical_unsigned_integer_type<T>::value, R>;

template <typename T>
using is_physical_integer_type =
std::integral_constant<bool, is_physical_unsigned_integer_type<T>::value ||
is_physical_signed_integer_type<T>::value>;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure 100%. but, is this indentation correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

C++ indentation is deferred to clang-format. We have a CI job which ensures that running clang-format is a no-op. As for explaining why it made this choice... I'm not familiar enough with its implementation to say, sorry


template <typename T, typename R = void>
using enable_if_physical_integer = enable_if_t<is_physical_integer_type<T>::value, R>;

// Like is_floating_type but excluding half-floats which don't have a
// float-like c type.
template <typename T>
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/arrow/util/formatting.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ namespace internal {
template <typename ARROW_TYPE, typename Enable = void>
class StringFormatter;

template <typename Appender>
using Return = decltype(std::declval<Appender>()(util::string_view{}));

template <>
class StringFormatter<BooleanType> {
public:
Expand All @@ -49,7 +52,7 @@ class StringFormatter<BooleanType> {
using value_type = bool;

template <typename Appender>
Status operator()(bool value, Appender&& append) {
Return<Appender> operator()(bool value, Appender&& append) {
if (value) {
const char string[] = "true";
return append(util::string_view(string));
Expand Down Expand Up @@ -95,7 +98,7 @@ class IntToStringFormatterMixin {
(is_signed ? 2 : 1) + std::numeric_limits<value_type>::digits10;

template <typename Appender>
Status operator()(value_type value, Appender&& append) {
Return<Appender> operator()(value_type value, Appender&& append) {
char buffer[buffer_size];
char* ptr = buffer + buffer_size;
int32_t size = 0;
Expand Down Expand Up @@ -206,7 +209,7 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter {
explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}

template <typename Appender>
Status operator()(value_type value, Appender&& append) {
Return<Appender> operator()(value_type value, Appender&& append) {
char buffer[buffer_size];
int size = FormatFloat(value, buffer, buffer_size);
return append(util::string_view(buffer, size));
Expand Down
19 changes: 12 additions & 7 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,15 @@ def _get_type(self):

def generate_column(self, size, name=None):
lower_bound, upper_bound = self._get_generated_data_bounds()
return self.generate_range(size, lower_bound, upper_bound, name=name)
return self.generate_range(size, lower_bound, upper_bound,
name=name, include_extremes=True)

def generate_range(self, size, lower, upper, name=None):
values = [int(x) for x in
np.random.randint(lower, upper, size=size)]
def generate_range(self, size, lower, upper, name=None,
include_extremes=False):
values = np.random.randint(lower, upper, size=size)
if include_extremes and size >= 2:
values[:2] = [lower, upper]
values = list(map(int if self.bit_width < 64 else str, values))

is_valid = self._make_is_valid(size)

Expand Down Expand Up @@ -1540,9 +1544,10 @@ def _temp_path():
.skip_category('Java') # TODO(ARROW-7779)
.skip_category('JS'),

generate_extension_case().skip_category('Go')
.skip_category('JS')
.skip_category('Rust'),
generate_extension_case()
.skip_category('Go')
.skip_category('JS')
.skip_category('Rust'),
]

if flight:
Expand Down
3 changes: 2 additions & 1 deletion dev/archery/archery/integration/tester_go.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class GoTester(Tester):
CONSUMER = True

# FIXME(sbinet): revisit for Go modules
GOPATH = os.getenv('GOPATH', '~/go')
HOME = os.getenv('HOME', '~')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the change, just set GOPATH if you have something else?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why ~ failed resolve to $HOME locally but this seemed like a way to save someone the next person from the frustration of reading the script to figure out that GOPATH needs set.

GOPATH = os.getenv('GOPATH', os.path.join(HOME, 'go'))
GOBIN = os.environ.get('GOBIN', os.path.join(GOPATH, 'bin'))

GO_INTEGRATION_EXE = os.path.join(GOBIN, 'arrow-json-integration-test')
Expand Down
52 changes: 38 additions & 14 deletions go/arrow/internal/arrjson/arrjson.go
Original file line number Diff line number Diff line change
Expand Up @@ -963,19 +963,23 @@ func i32ToJSON(arr *array.Int32) []interface{} {
func i64FromJSON(vs []interface{}) []int64 {
o := make([]int64, len(vs))
for i, v := range vs {
vv, err := v.(json.Number).Int64()
vv, err := strconv.ParseInt(v.(string), 10, 64)
if err != nil {
panic(err)
}
o[i] = int64(vv)
o[i] = vv
}
return o
}

func i64ToJSON(arr *array.Int64) []interface{} {
o := make([]interface{}, arr.Len())
for i := range o {
o[i] = arr.Value(i)
if arr.IsValid(i) {
o[i] = strconv.FormatInt(arr.Value(i), 10)
} else {
o[i] = "0"
}
}
return o
}
Expand Down Expand Up @@ -1043,19 +1047,23 @@ func u32ToJSON(arr *array.Uint32) []interface{} {
func u64FromJSON(vs []interface{}) []uint64 {
o := make([]uint64, len(vs))
for i, v := range vs {
vv, err := strconv.ParseUint(v.(json.Number).String(), 10, 64)
vv, err := strconv.ParseUint(v.(string), 10, 64)
if err != nil {
panic(err)
}
o[i] = uint64(vv)
o[i] = vv
}
return o
}

func u64ToJSON(arr *array.Uint64) []interface{} {
o := make([]interface{}, arr.Len())
for i := range o {
o[i] = arr.Value(i)
if arr.IsValid(i) {
o[i] = strconv.FormatUint(arr.Value(i), 10)
} else {
o[i] = "0"
}
}
return o
}
Expand Down Expand Up @@ -1193,7 +1201,7 @@ func date32ToJSON(arr *array.Date32) []interface{} {
func date64FromJSON(vs []interface{}) []arrow.Date64 {
o := make([]arrow.Date64, len(vs))
for i, v := range vs {
vv, err := v.(json.Number).Int64()
vv, err := strconv.ParseInt(v.(string), 10, 64)
if err != nil {
panic(err)
}
Expand All @@ -1205,7 +1213,11 @@ func date64FromJSON(vs []interface{}) []arrow.Date64 {
func date64ToJSON(arr *array.Date64) []interface{} {
o := make([]interface{}, arr.Len())
for i := range o {
o[i] = int64(arr.Value(i))
if arr.IsValid(i) {
o[i] = strconv.FormatInt(int64(arr.Value(i)), 10)
} else {
o[i] = "0"
}
}
return o
}
Expand Down Expand Up @@ -1233,7 +1245,7 @@ func time32ToJSON(arr *array.Time32) []interface{} {
func time64FromJSON(vs []interface{}) []arrow.Time64 {
o := make([]arrow.Time64, len(vs))
for i, v := range vs {
vv, err := v.(json.Number).Int64()
vv, err := strconv.ParseInt(v.(string), 10, 64)
if err != nil {
panic(err)
}
Expand All @@ -1245,15 +1257,19 @@ func time64FromJSON(vs []interface{}) []arrow.Time64 {
func time64ToJSON(arr *array.Time64) []interface{} {
o := make([]interface{}, arr.Len())
for i := range o {
o[i] = int64(arr.Value(i))
if arr.IsValid(i) {
o[i] = strconv.FormatInt(int64(arr.Value(i)), 10)
} else {
o[i] = "0"
}
}
return o
}

func timestampFromJSON(vs []interface{}) []arrow.Timestamp {
o := make([]arrow.Timestamp, len(vs))
for i, v := range vs {
vv, err := v.(json.Number).Int64()
vv, err := strconv.ParseInt(v.(string), 10, 64)
if err != nil {
panic(err)
}
Expand All @@ -1265,7 +1281,11 @@ func timestampFromJSON(vs []interface{}) []arrow.Timestamp {
func timestampToJSON(arr *array.Timestamp) []interface{} {
o := make([]interface{}, arr.Len())
for i := range o {
o[i] = int64(arr.Value(i))
if arr.IsValid(i) {
o[i] = strconv.FormatInt(int64(arr.Value(i)), 10)
} else {
o[i] = "0"
}
}
return o
}
Expand Down Expand Up @@ -1318,7 +1338,7 @@ func daytimeintervalToJSON(arr *array.DayTimeInterval) []interface{} {
func durationFromJSON(vs []interface{}) []arrow.Duration {
o := make([]arrow.Duration, len(vs))
for i, v := range vs {
vv, err := v.(json.Number).Int64()
vv, err := strconv.ParseInt(v.(string), 10, 64)
if err != nil {
panic(err)
}
Expand All @@ -1330,7 +1350,11 @@ func durationFromJSON(vs []interface{}) []arrow.Duration {
func durationToJSON(arr *array.Duration) []interface{} {
o := make([]interface{}, arr.Len())
for i := range o {
o[i] = arr.Value(i)
if arr.IsValid(i) {
o[i] = strconv.FormatInt(int64(arr.Value(i)), 10)
} else {
o[i] = "0"
}
}
return o
}
Expand Down