Skip to content

Commit

Permalink
GH-40431: [C++] Move key_hash/key_map/light_array related files to in…
Browse files Browse the repository at this point in the history
…ternal for prevent using by users (#40484)

### Rationale for this change

These files expose implementation details and APIs that are not meant for third-party use. This PR explicitly marks them internal, which also avoids having them installed.

### Are these changes tested?

By existing builds and tests.

### Are there any user-facing changes?

No, except hiding some header files that were not supposed to be included externally.

* GitHub Issue: #40431

Lead-authored-by: ZhangHuiGui <hugo.zhang@openpie.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
ZhangHuiGui and pitrou authored Apr 2, 2024
1 parent 5ddef63 commit 8163d02
Show file tree
Hide file tree
Showing 24 changed files with 44 additions and 39 deletions.
10 changes: 5 additions & 5 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -689,9 +689,9 @@ set(ARROW_COMPUTE_SRCS
compute/function.cc
compute/function_internal.cc
compute/kernel.cc
compute/key_hash.cc
compute/key_map.cc
compute/light_array.cc
compute/key_hash_internal.cc
compute/key_map_internal.cc
compute/light_array_internal.cc
compute/ordering.cc
compute/registry.cc
compute/kernels/codegen_internal.cc
Expand All @@ -717,8 +717,8 @@ set(ARROW_COMPUTE_SRCS
compute/row/row_internal.cc
compute/util.cc)

append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/acero/asof_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
#include "arrow/compute/function_internal.h"
#endif
#include "arrow/acero/time_series_util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/status.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/bloom_filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "arrow/acero/task_util.h"
#include "arrow/acero/test_util_internal.h"
#include "arrow/acero/util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/config.h"
#include "arrow/util/cpu_info.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/hash_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "arrow/acero/options.h"
#include "arrow/acero/schema_util.h"
#include "arrow/acero/util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/future.h"
#include "arrow/util/thread_pool.h"
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/acero/hash_join_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <cassert>
#include <vector>

#include "arrow/acero/options.h"
Expand Down Expand Up @@ -88,7 +89,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
const Expression& filter);

bool PayloadIsEmpty(int side) {
ARROW_DCHECK(side == 0 || side == 1);
assert(side == 0 || side == 1);
return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
}

Expand Down
14 changes: 7 additions & 7 deletions cpp/src/arrow/acero/schema_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

#pragma once

#include <cassert>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>

#include "arrow/compute/light_array.h" // for KeyColumnMetadata
#include "arrow/type.h" // for DataType, FieldRef, Field and Schema
#include "arrow/type.h" // for DataType, FieldRef, Field and Schema

namespace arrow {

Expand All @@ -47,8 +47,8 @@ struct SchemaProjectionMap {
const int* source_to_base;
const int* base_to_target;
inline int get(int i) const {
ARROW_DCHECK(i >= 0 && i < num_cols);
ARROW_DCHECK(source_to_base[i] != kMissingField);
assert(i >= 0 && i < num_cols);
assert(source_to_base[i] != kMissingField);
return base_to_target[source_to_base[i]];
}
};
Expand All @@ -66,7 +66,7 @@ class SchemaProjectionMaps {
Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema,
const std::vector<ProjectionIdEnum>& projection_handles,
const std::vector<const std::vector<FieldRef>*>& projections) {
ARROW_DCHECK(projection_handles.size() == projections.size());
assert(projection_handles.size() == projections.size());
ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema));
for (size_t i = 0; i < projections.size(); ++i) {
ARROW_RETURN_NOT_OK(
Expand Down Expand Up @@ -174,7 +174,7 @@ class SchemaProjectionMaps {
}
}
// We should never get here
ARROW_DCHECK(false);
assert(false);
return -1;
}

Expand Down Expand Up @@ -207,7 +207,7 @@ class SchemaProjectionMaps {
break;
}
}
ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField);
assert(field_id != SchemaProjectionMap::kMissingField);
mapping[i] = field_id;
inverse_mapping[field_id] = i;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/swiss_join.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "arrow/acero/util.h"
#include "arrow/array/util.h" // MakeArrayFromScalar
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/util/bit_util.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/acero/swiss_join_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
#include "arrow/acero/schema_util.h"
#include "arrow/acero/task_util.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_map.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_map_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/encode_internal.h"

namespace arrow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"

#include <memory.h>

#include <algorithm>
#include <cstdint>

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/ubsan.h"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

#include <cstdint>

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/util.h"

namespace arrow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <immintrin.h>

#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/bit_util.h"

namespace arrow {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/key_hash_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <unordered_set>

#include "arrow/array/builder_binary.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/util/cpu_info.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/key_map.h"
#include "arrow/compute/key_map_internal.h"

#include <algorithm>
#include <cstdint>
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <immintrin.h>

#include "arrow/compute/key_map.h"
#include "arrow/compute/key_map_internal.h"
#include "arrow/util/logging.h"

namespace arrow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"

#include <type_traits>

Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/light_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"

#include <gtest/gtest.h>
#include <numeric>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/row/compare_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include <cstdint>

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/compute/row/row_internal.h"
#include "arrow/compute/util.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/row/encode_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
#include <vector>

#include "arrow/array/data.h"
#include "arrow/compute/key_map.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_map_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/row_internal.h"
#include "arrow/compute/util.h"
#include "arrow/memory_pool.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/row/grouper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#include "arrow/compute/api_vector.h"
#include "arrow/compute/function.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/registry.h"
#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/grouper_internal.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/row/row_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <vector>

#include "arrow/buffer.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/logging.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ using internal::CpuInfo;
namespace util {

void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
int64_t new_top = top_ + EstimatedAllocationSize(num_bytes);
// Stack overflow check (see GH-39582).
// XXX cannot return a regular Status because most consumers do not either.
ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow";
Expand All @@ -48,7 +48,7 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {

void TempVectorStack::release(int id, uint32_t num_bytes) {
ARROW_DCHECK(num_vectors_ == id + 1);
int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
int64_t size = EstimatedAllocationSize(num_bytes);
ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[-1] ==
kGuard2);
ARROW_DCHECK(top_ >= size);
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/compute/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ARROW_EXPORT TempVectorStack {
Status Init(MemoryPool* pool, int64_t size) {
num_vectors_ = 0;
top_ = 0;
buffer_size_ = PaddedAllocationSize(size) + kPadding + 2 * sizeof(uint64_t);
buffer_size_ = EstimatedAllocationSize(size);
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
// Ensure later operations don't accidentally read uninitialized memory.
std::memset(buffer->mutable_data(), 0xFF, size);
Expand All @@ -98,7 +98,11 @@ class ARROW_EXPORT TempVectorStack {
}

private:
int64_t PaddedAllocationSize(int64_t num_bytes) {
static int64_t EstimatedAllocationSize(int64_t size) {
return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
}

static int64_t PaddedAllocationSize(int64_t num_bytes) {
// Round up allocation size to multiple of 8 bytes
// to avoid returning temp vectors with unaligned address.
//
Expand Down

0 comments on commit 8163d02

Please sign in to comment.