Skip to content

Commit d3a266c

Browse files
committed
fix pre-1970 timestamp roundtrip
1 parent 39c865e commit d3a266c

File tree

2 files changed

+31
-11
lines changed

2 files changed

+31
-11
lines changed

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <functional>
2929
#include <set>
3030
#include <sstream>
31+
#include <utility>
3132
#include <vector>
3233

3334
#include "arrow/array/builder_binary.h"
@@ -74,6 +75,7 @@
7475
#include "parquet/page_index.h"
7576
#include "parquet/properties.h"
7677
#include "parquet/test_util.h"
78+
#include "parquet/types.h"
7779

7880
using arrow::Array;
7981
using arrow::ArrayData;
@@ -4149,14 +4151,29 @@ INSTANTIATE_TEST_SUITE_P(Repetition_type, TestNestedSchemaRead,
41494151
::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
41504152

41514153
TEST(TestImpalaConversion, ArrowTimestampToImpalaTimestamp) {
4152-
// June 20, 2017 16:32:56 and 123456789 nanoseconds
4153-
int64_t nanoseconds = INT64_C(1497976376123456789);
4154+
std::vector<std::pair<int64_t, Int96>> test_cases = {
4155+
// June 20, 2017 16:32:56 and 123456789 nanoseconds
4156+
{INT64_C(1497976376123456789),
4157+
{{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}}},
4158+
// January 1, 1970 00:00:00 and 000000000 nanoseconds
4159+
{INT64_C(0), {{UINT32_C(0), UINT32_C(0), UINT32_C(2440588)}}},
4160+
// December 31, 1969 23:59:59 and 999999000 nanoseconds
4161+
{INT64_C(-1000), {{UINT32_C(2437872664), UINT32_C(20116), UINT32_C(2440587)}}},
4162+
// December 31, 1969 00:00:00 and 000000000 nanoseconds
4163+
{INT64_C(-86400000000000), {{UINT32_C(0), UINT32_C(0), UINT32_C(2440587)}}},
4164+
// January 1, 1970 00:00:00 and 000001000 nanoseconds
4165+
{INT64_C(1000), {{UINT32_C(1000), UINT32_C(0), UINT32_C(2440588)}}},
4166+
// January 2, 1970 00:00:00 and 000000000 nanoseconds
4167+
{INT64_C(86400000000000), {{UINT32_C(0), UINT32_C(0), UINT32_C(2440589)}}},
4168+
};
41544169

4155-
Int96 calculated;
4170+
for (auto& [timestamp, impala_timestamp] : test_cases) {
4171+
ASSERT_EQ(timestamp, ::parquet::Int96GetNanoSeconds(impala_timestamp));
41564172

4157-
Int96 expected = {{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}};
4158-
::parquet::internal::NanosecondsToImpalaTimestamp(nanoseconds, &calculated);
4159-
ASSERT_EQ(expected, calculated);
4173+
Int96 calculated;
4174+
::parquet::internal::NanosecondsToImpalaTimestamp(timestamp, &calculated);
4175+
ASSERT_EQ(impala_timestamp, calculated);
4176+
}
41604177
}
41614178

41624179
void TryReadDataFile(const std::string& path,

cpp/src/parquet/column_writer.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,14 +259,17 @@ constexpr int64_t kJulianEpochOffsetDays = INT64_C(2440588);
259259

260260
template <int64_t UnitPerDay, int64_t NanosecondsPerUnit>
261261
inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_timestamp) {
262-
int64_t julian_days = (time / UnitPerDay) + kJulianEpochOffsetDays;
263-
(*impala_timestamp).value[2] = (uint32_t)julian_days;
264-
262+
auto julian_days = static_cast<int32_t>(time / UnitPerDay + kJulianEpochOffsetDays);
265263
int64_t last_day_units = time % UnitPerDay;
266-
auto last_day_nanos = last_day_units * NanosecondsPerUnit;
264+
if (last_day_units < 0) {
265+
--julian_days;
266+
last_day_units += UnitPerDay;
267+
}
268+
impala_timestamp->value[2] = static_cast<uint32_t>(julian_days);
269+
uint64_t last_day_nanos = static_cast<uint64_t>(last_day_units) * NanosecondsPerUnit;
267270
// impala_timestamp will be unaligned every other entry so do memcpy instead
268271
// of assign and reinterpret cast to avoid undefined behavior.
269-
std::memcpy(impala_timestamp, &last_day_nanos, sizeof(int64_t));
272+
std::memcpy(impala_timestamp, &last_day_nanos, sizeof(uint64_t));
270273
}
271274

272275
constexpr int64_t kSecondsInNanos = INT64_C(1000000000);

0 commit comments

Comments
 (0)