Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix makeScanSpec wrong index issue if skip rowindex column #9866

5 changes: 3 additions & 2 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
}
}

int numChildren = 0;
// Process columns that will be projected out.
for (int i = 0; i < rowType->size(); ++i) {
auto& name = rowType->nameOf(i);
Expand All @@ -373,7 +374,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
}
auto it = outputSubfields.find(name);
if (it == outputSubfields.end()) {
auto* fieldSpec = spec->addFieldRecursively(name, *type, i);
auto* fieldSpec = spec->addFieldRecursively(name, *type, numChildren++);
filterOutNullMapKeys(*type, *fieldSpec);
filterSubfields.erase(name);
continue;
Expand All @@ -388,7 +389,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
}
filterSubfields.erase(it);
}
auto* fieldSpec = spec->addField(name, i);
auto* fieldSpec = spec->addField(name, numChildren++);
addSubfields(*type, subfieldSpecs, 1, pool, *fieldSpec);
filterOutNullMapKeys(*type, *fieldSpec);
subfieldSpecs.clear();
Expand Down
14 changes: 5 additions & 9 deletions velox/connectors/hive/SplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,15 +310,11 @@ void SplitReader::setRowIndexColumn(
auto rowIndexColumnName = rowIndexColumn->name();
auto rowIndexMetaColIdx =
readerOutputType_->getChildIdxIfExists(rowIndexColumnName);
if (rowIndexMetaColIdx.has_value() &&
!fileType->containsChild(rowIndexColumnName) &&
gaoyangxiaozhu marked this conversation as resolved.
Show resolved Hide resolved
hiveSplit_->partitionKeys.find(rowIndexColumnName) ==
hiveSplit_->partitionKeys.end()) {
dwio::common::RowNumberColumnInfo rowNumberColumnInfo;
rowNumberColumnInfo.insertPosition = rowIndexMetaColIdx.value();
rowNumberColumnInfo.name = rowIndexColumnName;
baseRowReaderOpts_.setRowNumberColumnInfo(std::move(rowNumberColumnInfo));
}
dwio::common::RowNumberColumnInfo rowNumberColumnInfo;
VELOX_CHECK(rowIndexMetaColIdx.has_value());
rowNumberColumnInfo.insertPosition = rowIndexMetaColIdx.value();
gaoyangxiaozhu marked this conversation as resolved.
Show resolved Hide resolved
rowNumberColumnInfo.name = rowIndexColumnName;
baseRowReaderOpts_.setRowNumberColumnInfo(std::move(rowNumberColumnInfo));
}

std::vector<TypePtr> SplitReader::adaptColumns(
Expand Down
4 changes: 4 additions & 0 deletions velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,10 @@ TEST_F(ParquetTableScanTest, rowIndex) {
{"a", "_tmp_metadata_row_index"},
assignments,
"SELECT a, _tmp_metadata_row_index FROM tmp");
assertSelectWithAssignments(
{"_tmp_metadata_row_index", "a"},
assignments,
"SELECT _tmp_metadata_row_index, a FROM tmp");
// case 2: file has `_tmp_metadata_row_index` column, then use user data
// insteads of generating it.
loadData(
Expand Down
Loading