-
Notifications
You must be signed in to change notification settings - Fork 743
Fix prefixed vector index with PK columns (#18196) #18889
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,51 +45,22 @@ void AddRowToLevel(TBufferData& buffer, TClusterId parent, TClusterId child, con | |
| buffer.AddRow(TSerializedCellVec{pk}, TSerializedCellVec::Serialize(data)); | ||
| } | ||
|
|
||
| void AddRowMainToBuild(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row) { | ||
| EnsureNoPostingParentFlag(parent); | ||
|
|
||
| std::array<TCell, 1> cells; | ||
| cells[0] = TCell::Make(parent); | ||
| auto pk = TSerializedCellVec::Serialize(cells); | ||
| TSerializedCellVec::UnsafeAppendCells(key, pk); | ||
| buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row), | ||
| TSerializedCellVec{key}); | ||
| } | ||
|
|
||
| void AddRowMainToPosting(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos) | ||
| { | ||
| parent = SetPostingParentFlag(parent); | ||
|
|
||
| std::array<TCell, 1> cells; | ||
| cells[0] = TCell::Make(parent); | ||
| auto pk = TSerializedCellVec::Serialize(cells); | ||
| TSerializedCellVec::UnsafeAppendCells(key, pk); | ||
| buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row.Slice(dataPos)), | ||
| TSerializedCellVec{key}); | ||
| } | ||
|
|
||
| void AddRowBuildToBuild(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 prefixColumns) | ||
| { | ||
| EnsureNoPostingParentFlag(parent); | ||
| void AddRowToData(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> sourcePk, | ||
| TArrayRef<const TCell> dataColumns, TArrayRef<const TCell> origKey, bool isPostingLevel) { | ||
| if (isPostingLevel) { | ||
| parent = SetPostingParentFlag(parent); | ||
| } else { | ||
| EnsureNoPostingParentFlag(parent); | ||
| } | ||
|
|
||
| std::array<TCell, 1> cells; | ||
| cells[0] = TCell::Make(parent); | ||
| auto pk = TSerializedCellVec::Serialize(cells); | ||
| TSerializedCellVec::UnsafeAppendCells(key.Slice(prefixColumns), pk); | ||
| buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row), | ||
| TSerializedCellVec{key}); | ||
| } | ||
|
|
||
| void AddRowBuildToPosting(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos, ui32 prefixColumns) | ||
| { | ||
| parent = SetPostingParentFlag(parent); | ||
| TSerializedCellVec::UnsafeAppendCells(sourcePk, pk); | ||
|
|
||
| std::array<TCell, 1> cells; | ||
| cells[0] = TCell::Make(parent); | ||
| auto pk = TSerializedCellVec::Serialize(cells); | ||
| TSerializedCellVec::UnsafeAppendCells(key.Slice(prefixColumns), pk); | ||
| buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row.Slice(dataPos)), | ||
| TSerializedCellVec{key}); | ||
| buffer.AddRow(TSerializedCellVec{std::move(pk)}, | ||
| TSerializedCellVec::Serialize(dataColumns), | ||
| TSerializedCellVec{origKey}); | ||
| } | ||
|
|
||
| TTags MakeScanTags(const TUserTable& table, const TProtoStringType& embedding, | ||
|
|
@@ -114,12 +85,11 @@ TTags MakeScanTags(const TUserTable& table, const TProtoStringType& embedding, | |
|
|
||
| std::shared_ptr<NTxProxy::TUploadTypes> MakeOutputTypes(const TUserTable& table, NKikimrTxDataShard::EKMeansState uploadState, | ||
| const TProtoStringType& embedding, const google::protobuf::RepeatedPtrField<TProtoStringType>& data, | ||
| ui32 prefixColumns) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. тут наверно тоже два метода бы, но не уверен, посмотри сам как они логически вызываются и рядом со всеми методами нарисуй плиз схему строки
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. да вот тут мне кажется можно и оставить, тут относительно простая логика - ну типа если даны явно колонки первичного ключа то берём их из pkColumns... |
||
| const google::protobuf::RepeatedPtrField<TProtoStringType>& pkColumns) | ||
| { | ||
| auto types = GetAllTypes(table); | ||
|
|
||
| auto result = std::make_shared<NTxProxy::TUploadTypes>(); | ||
| result->reserve(1 + 1 + std::min((table.KeyColumnTypes.size() - prefixColumns) + data.size(), types.size())); | ||
|
|
||
| Ydb::Type type; | ||
| type.set_type_id(NTableIndex::ClusterIdType); | ||
|
|
@@ -133,8 +103,14 @@ std::shared_ptr<NTxProxy::TUploadTypes> MakeOutputTypes(const TUserTable& table, | |
| types.erase(it); | ||
| } | ||
| }; | ||
| for (const auto& column : table.KeyColumnIds | std::views::drop(prefixColumns)) { | ||
| addType(table.Columns.at(column).Name); | ||
| if (pkColumns.size()) { | ||
| for (const auto& column : pkColumns) { | ||
| addType(column); | ||
| } | ||
| } else { | ||
| for (const auto& column : table.KeyColumnIds) { | ||
| addType(table.Columns.at(column).Name); | ||
| } | ||
| } | ||
| switch (uploadState) { | ||
| case NKikimrTxDataShard::EKMeansState::UPLOAD_MAIN_TO_BUILD: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
тест что ты добавил окей пусть будет, но может ещё простеньких тестов сделать несколько штук как в тикете?
#18196
то есть CREATE TABLE + индекс и проверить содержимое всех трех таблиц и их схему
(и не для префиксного тоже бы)
без сплитов, просто базовую функциональность
их раньше ещё читать нельзя было, а теперь можно
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
короче говоря посмотрел, вроде дело хорошее, но выглядит как будто надо это отдельно делать, тут-то именно багфикс. а остальные кейсы, где не префиксный и т.п. - вроде и так другими тестами проверяются