Skip to content

Commit

Permalink
apacheGH-36026: [C++][ORC] Check TZDB availability for ORC
Browse files Browse the repository at this point in the history
  • Loading branch information
wgtmac committed Mar 21, 2024
1 parent 14027c3 commit 0c640ac
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
26 changes: 19 additions & 7 deletions cpp/src/arrow/adapters/orc/adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,15 @@
#include "arrow/adapters/orc/adapter.h"

#include <algorithm>
#include <cstdint>
#include <functional>
#include <cstdlib>
#include <filesystem>
#include <list>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "arrow/adapters/orc/util.h"
#include "arrow/buffer.h"
#include "arrow/builder.h"
#include "arrow/io/interfaces.h"
#include "arrow/memory_pool.h"
Expand All @@ -37,14 +35,11 @@
#include "arrow/table.h"
#include "arrow/table_builder.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/util/key_value_metadata.h"
#include "arrow/util/macros.h"
#include "arrow/util/range.h"
#include "arrow/util/visibility.h"
#include "orc/Exceptions.hh"

// alias to not interfere with nested orc namespace
Expand Down Expand Up @@ -183,6 +178,21 @@ liborc::RowReaderOptions default_row_reader_options() {
return options;
}

// Remove this check once https://issues.apache.org/jira/browse/ORC-1661 is fixed.
Status check_timezone_database_availability() {
auto tz_dir = std::getenv("TZDIR");
bool is_tzdb_avaiable = tz_dir != nullptr
? std::filesystem::exists(tz_dir)
: std::filesystem::exists("/usr/share/zoneinfo");
if (!is_tzdb_avaiable) {
return Status::Invalid(
"IANA timezone database is unavailable but required by ORC."
" Please install it to /usr/share/zoneinfo or set TZDIR env to the installed"
" directory");
}
return Status::OK();
}

} // namespace

class ORCFileReader::Impl {
Expand Down Expand Up @@ -541,6 +551,7 @@ ORCFileReader::~ORCFileReader() {}

Result<std::unique_ptr<ORCFileReader>> ORCFileReader::Open(
const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool) {
RETURN_NOT_OK(check_timezone_database_availability());
auto result = std::unique_ptr<ORCFileReader>(new ORCFileReader());
RETURN_NOT_OK(result->impl_->Open(file, pool));
return std::move(result);
Expand Down Expand Up @@ -807,6 +818,7 @@ ORCFileWriter::ORCFileWriter() { impl_.reset(new ORCFileWriter::Impl()); }

Result<std::unique_ptr<ORCFileWriter>> ORCFileWriter::Open(
io::OutputStream* output_stream, const WriteOptions& writer_options) {
RETURN_NOT_OK(check_timezone_database_availability());
std::unique_ptr<ORCFileWriter> result =
std::unique_ptr<ORCFileWriter>(new ORCFileWriter());
Status status = result->impl_->Open(output_stream, writer_options);
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/arrow/adapters/orc/adapter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
#include "arrow/status.h"
#include "arrow/table.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/matchers.h"
#include "arrow/testing/random.h"
#include "arrow/type.h"
#include "arrow/util/io_util.h"
#include "arrow/util/key_value_metadata.h"

namespace liborc = orc;
Expand Down Expand Up @@ -636,6 +638,30 @@ TEST(TestAdapterReadWrite, FieldAttributesRoundTrip) {
AssertSchemaEqual(schema, read_schema, /*check_metadata=*/true);
}

TEST(TestAdapterReadWrite, ThrowWhenTZDBUnavaiable) {
// Backup the original TZDIR env and set a wrong value by purpose to trigger the check.
const char* tzdir_env_key = "TZDIR";
const char* expect_str = "IANA timezone database is unavailable but required by ORC";
auto tzdir_env_backup = std::getenv(tzdir_env_key);
ARROW_EXPECT_OK(arrow::internal::SetEnvVar(tzdir_env_key, "/a/b/c/d/e"));

EXPECT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create(1024));
EXPECT_THAT(
adapters::orc::ORCFileWriter::Open(out_stream.get(), adapters::orc::WriteOptions()),
Raises(StatusCode::Invalid, testing::HasSubstr(expect_str)));

EXPECT_OK_AND_ASSIGN(auto buffer, out_stream->Finish());
EXPECT_THAT(adapters::orc::ORCFileReader::Open(
std::make_shared<io::BufferReader>(buffer), default_memory_pool()),
Raises(StatusCode::Invalid, testing::HasSubstr(expect_str)));

// Restore TZDIR env.
ARROW_EXPECT_OK(arrow::internal::DelEnvVar(tzdir_env_key));
if (tzdir_env_backup) {
ARROW_EXPECT_OK(arrow::internal::SetEnvVar(tzdir_env_key, tzdir_env_backup));
}
}

// Trivial

class TestORCWriterTrivialNoWrite : public ::testing::Test {};
Expand Down

0 comments on commit 0c640ac

Please sign in to comment.