Skip to content

Commit bfc5217

Browse files
committed
fix review and add more tests
1 parent 0cf4fe7 commit bfc5217

File tree

2 files changed

+27
-21
lines changed

2 files changed

+27
-21
lines changed

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <functional>
2929
#include <set>
3030
#include <sstream>
31+
#include <utility>
3132
#include <vector>
3233

3334
#include "arrow/array/builder_binary.h"
@@ -376,19 +377,19 @@ const double test_traits<::arrow::DoubleType>::value(4.2);
376377
template <>
377378
struct test_traits<::arrow::StringType> {
378379
static constexpr ParquetType::type parquet_enum = ParquetType::BYTE_ARRAY;
379-
static std::string const value;
380+
static const std::string value;
380381
};
381382

382383
template <>
383384
struct test_traits<::arrow::BinaryType> {
384385
static constexpr ParquetType::type parquet_enum = ParquetType::BYTE_ARRAY;
385-
static std::string const value;
386+
static const std::string value;
386387
};
387388

388389
template <>
389390
struct test_traits<::arrow::FixedSizeBinaryType> {
390391
static constexpr ParquetType::type parquet_enum = ParquetType::FIXED_LEN_BYTE_ARRAY;
391-
static std::string const value;
392+
static const std::string value;
392393
};
393394

394395
const std::string test_traits<::arrow::StringType>::value("Test"); // NOLINT
@@ -4150,20 +4151,23 @@ INSTANTIATE_TEST_SUITE_P(Repetition_type, TestNestedSchemaRead,
41504151
::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
41514152

41524153
TEST(TestImpalaConversion, ArrowTimestampToImpalaTimestamp) {
4153-
{
4154-
// June 20, 2017 16:32:56 and 123456789 nanoseconds
4155-
int64_t timestamp = INT64_C(1497976376123456789);
4156-
Int96 impala_timestamp = {{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}};
4157-
ASSERT_EQ(timestamp, ::parquet::Int96GetNanoSeconds(impala_timestamp));
4154+
std::vector<std::pair<int64_t, Int96>> test_cases = {
4155+
// June 20, 2017 16:32:56 and 123456789 nanoseconds
4156+
{INT64_C(1497976376123456789),
4157+
{{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}}},
4158+
// January 1, 1970 00:00:00 and 000000000 nanoseconds
4159+
{INT64_C(0), {{UINT32_C(0), UINT32_C(0), UINT32_C(2440588)}}},
4160+
// December 31, 1969 23:59:59 and 999999000 nanoseconds
4161+
{INT64_C(-1000), {{UINT32_C(2437872664), UINT32_C(20116), UINT32_C(2440587)}}},
4162+
// December 31, 1969 00:00:00 and 000000000 nanoseconds
4163+
{INT64_C(-86400000000000), {{UINT32_C(0), UINT32_C(0), UINT32_C(2440587)}}},
4164+
// January 1, 1970 00:00:00 and 000001000 nanoseconds
4165+
{INT64_C(1000), {{UINT32_C(1000), UINT32_C(0), UINT32_C(2440588)}}},
4166+
// January 2, 1970 00:00:00 and 000000000 nanoseconds
4167+
{INT64_C(86400000000000), {{UINT32_C(0), UINT32_C(0), UINT32_C(2440589)}}},
4168+
};
41584169

4159-
Int96 calculated;
4160-
::parquet::internal::NanosecondsToImpalaTimestamp(timestamp, &calculated);
4161-
ASSERT_EQ(impala_timestamp, calculated);
4162-
}
4163-
{
4164-
// January 1, 1970 07:59:59 and 999999000 nanoseconds
4165-
int64_t timestamp = INT64_C(-1000);
4166-
Int96 impala_timestamp = {{UINT32_C(2437872664), UINT32_C(20116), UINT32_C(2440587)}};
4170+
for (auto& [timestamp, impala_timestamp] : test_cases) {
41674171
ASSERT_EQ(timestamp, ::parquet::Int96GetNanoSeconds(impala_timestamp));
41684172

41694173
Int96 calculated;

cpp/src/parquet/column_writer.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,14 @@ constexpr int64_t kJulianEpochOffsetDays = INT64_C(2440588);
259259

260260
template <int64_t UnitPerDay, int64_t NanosecondsPerUnit>
261261
inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_timestamp) {
262-
int32_t julian_days = static_cast<int32_t>(time / UnitPerDay + kJulianEpochOffsetDays) +
263-
(time < 0 ? -1 : 0);
262+
auto julian_days = static_cast<int32_t>(time / UnitPerDay + kJulianEpochOffsetDays);
263+
int64_t last_day_units = time % UnitPerDay;
264+
if (last_day_units < 0) {
265+
--julian_days;
266+
last_day_units += UnitPerDay;
267+
}
264268
impala_timestamp->value[2] = static_cast<uint32_t>(julian_days);
265-
266-
uint64_t last_day_units = time % UnitPerDay + (time < 0 ? UnitPerDay : 0);
267-
uint64_t last_day_nanos = last_day_units * NanosecondsPerUnit;
269+
uint64_t last_day_nanos = static_cast<uint64_t>(last_day_units) * NanosecondsPerUnit;
268270
// impala_timestamp will be unaligned every other entry so do memcpy instead
269271
// of assign and reinterpret cast to avoid undefined behavior.
270272
std::memcpy(impala_timestamp, &last_day_nanos, sizeof(uint64_t));

0 commit comments

Comments
 (0)