Skip to content

Commit

Permalink
fix comment
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Nov 15, 2024
1 parent 23fb3fa commit d892819
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 9 deletions.
2 changes: 1 addition & 1 deletion cpp/src/parquet/bloom_filter_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#pragma once

#include "arrow/io/type_fwd.h"
#include "arrow/io/interfaces.h"
#include "parquet/properties.h"
#include "parquet/type_fwd.h"

Expand Down
8 changes: 5 additions & 3 deletions cpp/src/parquet/column_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1701,6 +1701,7 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(

auto update_stats = [&](int64_t num_chunk_levels,
const std::shared_ptr<Array>& chunk_indices) {
DCHECK(page_statistics_ != nullptr || bloom_filter_ != nullptr);
// TODO(PARQUET-2068) This approach may make two copies. First, a copy of the
// indices array to a (hopefully smaller) referenced indices array. Second, a copy
// of the values array to a (probably not smaller) referenced values array.
Expand All @@ -1725,9 +1726,8 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(
&exec_ctx));
referenced_dictionary = referenced_dictionary_datum.make_array();
}

int64_t non_null_count = chunk_indices->length() - chunk_indices->null_count();
if (page_statistics_ != nullptr) {
int64_t non_null_count = chunk_indices->length() - chunk_indices->null_count();
page_statistics_->IncrementNullCount(num_chunk_levels - non_null_count);
page_statistics_->IncrementNumValues(non_null_count);
page_statistics_->Update(*referenced_dictionary, /*update_counts=*/false);
Expand Down Expand Up @@ -2426,7 +2426,9 @@ void TypedColumnWriterImpl<FLBAType>::UpdateBloomFilter(const FLBA* values,

template <>
void TypedColumnWriterImpl<BooleanType>::UpdateBloomFilter(const bool*, int64_t) {
DCHECK(bloom_filter_ == nullptr);
if (ARROW_PREDICT_FALSE(bloom_filter_ != nullptr)) {
throw ParquetException("BooleanType does not support bloom filters");
}
}

template <typename DType>
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/parquet/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -516,9 +516,10 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder {
using RowGroupIndexLocation = std::vector<std::optional<IndexLocation>>;

/// Alias type of bloom filter location of a row group. The filter location
/// is located by column ordinal. Number of columns with a bloom filter to
/// be relatively small compared to the number of overall columns, so
/// map is used.
/// is located by column ordinal.
///
/// Number of columns with a bloom filter to be relatively small compared to
/// the number of overall columns, so map is used.
using RowGroupBloomFilterLocation = std::map<int32_t, IndexLocation>;

/// Alias type of page index and location of a parquet file. The
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/page_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#pragma once

#include "arrow/io/type_fwd.h"
#include "arrow/io/interfaces.h"
#include "parquet/encryption/type_fwd.h"
#include "parquet/types.h"

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ class PARQUET_EXPORT WriterProperties {
}

/// Disable bloom filter for the column specified by `path`.
/// Default enabled.
/// Default disabled.
Builder* disable_bloom_filter(const std::shared_ptr<schema::ColumnPath>& path) {
return this->disable_bloom_filter(path->ToDotString());
}
Expand Down

0 comments on commit d892819

Please sign in to comment.