Skip to content

Commit

Permalink
replace StringType's gernerate data by StringWithRepeats
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhangHuiGui committed Mar 14, 2024
1 parent 64ad14b commit 62fbbf6
Showing 1 changed file with 12 additions and 40 deletions.
52 changes: 12 additions & 40 deletions cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,32 +87,23 @@ struct HashBenchCase {
double null_probability;
};

template <typename Type>
template <typename ArrowType>
struct HashParams {
using T = typename Type::c_type;

using CType = typename TypeTraits<ArrowType>::CType;
HashBenchCase params;

void GenerateTestData(std::shared_ptr<Array>* arr) const {
std::vector<int64_t> draws;
std::vector<T> values;
std::vector<bool> is_valid;
randint<int64_t>(params.length, 0, params.num_unique, &draws);
for (int64_t draw : draws) {
values.push_back(static_cast<T>(draw));
}
if (params.null_probability > 0) {
random_is_valid(params.length, params.null_probability, &is_valid);
ArrayFromVector<Type, T>(is_valid, values, arr);
} else {
ArrayFromVector<Type, T>(values, arr);
}
random::RandomArrayGenerator rand(0);
auto min = static_cast<CType>(0);
auto max = static_cast<CType>(params.num_unique);

*arr = rand.Numeric<ArrowType>(params.length, min, max, params.null_probability);
}

void SetMetadata(benchmark::State& state) const {
state.counters["null_percent"] = params.null_probability * 100;
state.counters["num_unique"] = static_cast<double>(params.num_unique);
state.SetBytesProcessed(state.iterations() * params.length * sizeof(T));
state.SetBytesProcessed(state.iterations() * params.length * sizeof(CType));
state.SetItemsProcessed(state.iterations() * params.length);
}
};
Expand All @@ -122,29 +113,10 @@ struct HashParams<StringType> {
HashBenchCase params;
int32_t byte_width;
void GenerateTestData(std::shared_ptr<Array>* arr) const {
std::vector<int64_t> draws;
randint<int64_t>(params.length, 0, params.num_unique, &draws);

const int64_t total_bytes = this->byte_width * params.length;
std::vector<uint8_t> uniques(total_bytes);
const uint32_t seed = 0;
random_bytes(total_bytes, seed, uniques.data());

std::vector<bool> is_valid;
if (params.null_probability > 0) {
random_is_valid(params.length, params.null_probability, &is_valid);
}

StringBuilder builder;
for (int64_t i = 0; i < params.length; ++i) {
if (params.null_probability == 0 || is_valid[i]) {
ABORT_NOT_OK(builder.Append(uniques.data() + this->byte_width * draws[i],
this->byte_width));
} else {
ABORT_NOT_OK(builder.AppendNull());
}
}
ABORT_NOT_OK(builder.Finish(arr));
random::RandomArrayGenerator rnd(/*seed=*/0);
*arr = rnd.StringWithRepeats(
params.length, params.num_unique, /*min_length=*/this->byte_width,
/*max_length=*/this->byte_width, params.null_probability);
}

void SetMetadata(benchmark::State& state) const {
Expand Down

0 comments on commit 62fbbf6

Please sign in to comment.