Skip to content

Commit

Permalink
add RowGroupPositions
Browse files Browse the repository at this point in the history
  • Loading branch information
luffy-zh committed Sep 20, 2024
1 parent 7a31082 commit f1b8e21
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 13 deletions.
9 changes: 7 additions & 2 deletions c++/include/orc/Reader.hh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ namespace orc {
};
ReaderMetrics* getDefaultReaderMetrics();

struct RowGroupPositions {
uint64_t columnId;
std::vector<int32_t> positions;
};

/**
* Options for creating a Reader.
*/
Expand Down Expand Up @@ -660,9 +665,9 @@ namespace orc {

/**
* Get the row group positions of the specified column in the current stripe.
* @return the position entries for the specified column.
* @return the position entries for the specified columns.
*/
virtual std::vector<std::vector<int>> getCurrentStripePositionEntries(uint64_t columnId) = 0;
virtual std::vector<RowGroupPositions> getPositionEntries(int columnId) = 0;
};
} // namespace orc

Expand Down
10 changes: 6 additions & 4 deletions c++/src/Reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1241,16 +1241,18 @@ namespace orc {
useTightNumericVector_);
}

std::vector<std::vector<int>> RowReaderImpl::getCurrentStripePositionEntries(uint64_t columnId) {
std::vector<RowGroupPositions> RowReaderImpl::getPositionEntries(int columnId) {
loadStripeIndex();
std::vector<std::vector<int>> result;
std::vector<RowGroupPositions> result;
auto rowIndex = rowIndexes_[columnId];
RowGroupPositions rgPositions;
rgPositions.columnId = columnId;
for (auto rowIndexEntry : rowIndex.entry()) {
std::vector<int> posVector;
auto posVector = rgPositions.positions;
for (auto position : rowIndexEntry.positions()) {
posVector.push_back(position);
}
result.push_back(posVector);
result.push_back(rgPositions);
}

return result;
Expand Down
2 changes: 1 addition & 1 deletion c++/src/Reader.hh
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ namespace orc {
return &schemaEvolution_;
}

std::vector<std::vector<int>> getCurrentStripePositionEntries(uint64_t columnId) override;
std::vector<RowGroupPositions> getPositionEntries(int columnId) override;
};

class ReaderImpl : public Reader {
Expand Down
12 changes: 6 additions & 6 deletions c++/test/TestWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ namespace orc {
void verifyCompressionBlockAlignment(const std::unique_ptr<RowReader>& reader,
uint64_t columnCount) {
for (uint64_t i = 0; i < columnCount; ++i) {
auto posEntries = reader->getCurrentStripePositionEntries(i + 1);
auto posEntries = reader->getPositionEntries(i);
auto subType = reader->getSelectedType().getSubtype(i);
for (auto posVector : posEntries) {
for (uint64_t posIndex = 0; posIndex < posVector.size(); ++posIndex) {
for (auto rowGroupPositions : posEntries) {
for (uint64_t posIndex = 0; posIndex < rowGroupPositions.positions.size(); ++posIndex) {
// After we call finishStream(), unusedBufferSize is set to 0,
// so only the first position is valid in each recordPosition call.
switch (subType->getKind()) {
Expand All @@ -102,20 +102,20 @@ namespace orc {
case CHAR:
case VARCHAR: {
if (posIndex != 0 && posIndex != 2) {
EXPECT_EQ(posVector[posIndex], 0);
EXPECT_EQ(rowGroupPositions.positions[posIndex], 0);
}
break;
}
case TIMESTAMP_INSTANT:
case TIMESTAMP: {
if (posIndex != 0 && posIndex != 3) {
EXPECT_EQ(posVector[posIndex], 0);
EXPECT_EQ(rowGroupPositions.positions[posIndex], 0);
}
break;
}
default: {
if (posIndex != 0) {
EXPECT_EQ(posVector[posIndex], 0);
EXPECT_EQ(rowGroupPositions.positions[posIndex], 0);
}
break;
}
Expand Down

0 comments on commit f1b8e21

Please sign in to comment.