diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 9cbbbce0b787..a605cd199165 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -22,40 +22,53 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo const TKqpOptimizeContext& kqpCtx) { auto inputNode = TExprBase(input); - double nRows = 0; - int nAttrs = 0; + std::shared_ptr inputStats; - const TExprNode* path; + int nAttrs = 0; + bool readRange = false; if (auto readTable = inputNode.Maybe()) { - path = readTable.Cast().Table().Path().Raw(); + inputStats = typeCtx->GetStats(readTable.Cast().Table().Raw()); nAttrs = readTable.Cast().Columns().Size(); + + auto range = readTable.Cast().Range(); + auto rangeFrom = range.From().Maybe(); + auto rangeTo = range.To().Maybe(); + if (rangeFrom && rangeTo) { + readRange = true; + } } else if (auto readRanges = inputNode.Maybe()) { - path = readRanges.Cast().Table().Path().Raw(); + inputStats = typeCtx->GetStats(readRanges.Cast().Table().Raw()); nAttrs = readRanges.Cast().Columns().Size(); } else { Y_ENSURE(false, "Invalid node type for InferStatisticsForReadTable"); } - const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path->Content()); - int totalAttrs = tableData.Metadata->Columns.size(); - nRows = tableData.Metadata->RecordsCount; - - double byteSize = tableData.Metadata->DataSize * (nAttrs / (double)totalAttrs); - - auto keyColumns = TIntrusivePtr(new TOptimizerStatistics::TKeyColumns(tableData.Metadata->KeyColumnNames)); - auto stats = std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, keyColumns); - if (kqpCtx.Config->OverrideStatistics.Get()) { - stats = OverrideStatistics(*stats, path->Content(), *kqpCtx.Config->OverrideStatistics.Get()); + /** + * We need index statistics to calculate this in the future + * Right now we use very small estimates to make sure CBO picks Lookup Joins + * I.e. there can be a chain of lookup joins in OLTP scenario and we want to make + * sure the cardinality doesn't blow up and lookup joins are still being picked + */ + double inputRows = inputStats->Nrows; + double nRows = inputRows; + if (readRange) { + nRows = 1; } - if (stats->ColumnStatistics) { - for (const auto& [columnName, metaData]: tableData.Metadata->Columns) { - stats->ColumnStatistics->Data[columnName].Type = metaData.Type; - } - } + double sizePerRow = inputStats->ByteSize / (inputRows==0?1:inputRows); + double byteSize = nRows * sizePerRow * (nAttrs / (double)inputStats->Ncols); - YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table, nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols; + auto stats = std::make_shared( + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + 0.0, + inputStats->KeyColumns, + inputStats->ColumnStatistics); + + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table, nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols << ", byteSize: " << stats->ByteSize; typeCtx->SetStats(input.Get(), stats); } @@ -81,7 +94,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon stats = OverrideStatistics(*stats, path.Value(), *kqpCtx.Config->OverrideStatistics.Get()); } - YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols << ", nKeyColumns: " << stats->KeyColumns->Data.size(); + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << stats->Nrows << ", nattrs: " << stats->Ncols << ", byteSize: " << stats->ByteSize << ", nKeyColumns: " << stats->KeyColumns->Data.size(); typeCtx->SetStats(input.Get(), stats); } @@ -103,7 +116,14 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation auto inputStats = typeCtx->GetStats(streamLookup.Table().Raw()); auto byteSize = inputStats->ByteSize * (nAttrs / (double) inputStats->Ncols); - typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, inputStats->Nrows, nAttrs, byteSize, 0, inputStats->KeyColumns)); + typeCtx->SetStats(input.Get(), std::make_shared( + EStatisticsType::BaseTable, + inputStats->Nrows, + nAttrs, + byteSize, + 0, + inputStats->KeyColumns, + inputStats->ColumnStatistics)); } /** @@ -134,7 +154,14 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation byteSize = 10; } - typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0, inputStats->KeyColumns)); + typeCtx->SetStats(input.Get(), std::make_shared( + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + 0, + inputStats->KeyColumns, + inputStats->ColumnStatistics)); } /** @@ -151,7 +178,8 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn return; } - double nRows = inputStats->Nrows; + double inputRows = inputStats->Nrows; + double nRows = inputRows; // Check if we have a range expression, in that case just assign a single row to this read // We don't currently check the size of an index lookup @@ -165,10 +193,19 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn } int nAttrs = sourceSettings.Columns().Size(); + + double sizePerRow = inputStats->ByteSize / (inputRows==0?1:inputRows); + double byteSize = nRows * sizePerRow * (nAttrs / (double)inputStats->Ncols); double cost = inputStats->Cost; - double byteSize = inputStats->ByteSize * (nAttrs / (double)inputStats->Ncols); - typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, cost, inputStats->KeyColumns)); + typeCtx->SetStats(input.Get(), std::make_shared( + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + cost, + inputStats->KeyColumns, + inputStats->ColumnStatistics)); } /** @@ -199,7 +236,8 @@ void InferStatisticsForReadTableIndexRanges(const TExprNode::TPtr& input, TTypeA inputStats->Ncols, inputStats->ByteSize, inputStats->Cost, - indexColumnsPtr); + indexColumnsPtr, + inputStats->ColumnStatistics); typeCtx->SetStats(input.Get(), stats); diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcc.json b/ydb/core/kqp/ut/join/data/join_order/tpcc.json new file mode 100644 index 000000000000..f02ba4c3350a --- /dev/null +++ b/ydb/core/kqp/ut/join/data/join_order/tpcc.json @@ -0,0 +1,13 @@ +{ + "op_name": "InnerJoin (Map)", + "args": [ + { + "op_name": "TableLookup", + "table": "stock" + }, + { + "op_name": "TableRangeScan", + "table": "order_line" + } + ] +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json index 56e74720fbcd..98b6b74da8d1 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json @@ -1,49 +1,49 @@ { - "op_name": "InnerJoin (Grace)", + "op_name": "InnerJoin (MapJoin)", "args": [ - { - "op_name": "InnerJoin (Grace)", - "args": [ - { - "op_name": "TableFullScan", - "table": "orders" - }, - { + { "op_name": "InnerJoin (Grace)", "args": [ - { - "op_name": "TableFullScan", - "table": "lineitem" - }, - { - "op_name": "InnerJoin (Grace)", - "args": [ - { + { "op_name": "TableFullScan", - "table": "partsupp" - }, - { + "table": "orders" + }, + { + "op_name": "InnerJoin (Grace)", + "args": [ + { + "op_name": "TableFullScan", + "table": "lineitem" + }, + { + "op_name": "InnerJoin (MapJoin)", + "args": [ + { + "op_name": "TableFullScan", + "table": "partsupp" + }, + { + "op_name": "TableFullScan", + "table": "part" + } + ] + } + ] + } + ] + }, + { + "op_name": "InnerJoin (MapJoin)", + "args": [ + { + "op_name": "TableFullScan", + "table": "supplier" + }, + { "op_name": "TableFullScan", - "table": "part" - } - ] - } + "table": "nation" + } ] - } - ] - }, - { - "op_name": "InnerJoin (MapJoin)", - "args": [ - { - "op_name": "TableFullScan", - "table": "supplier" - }, - { - "op_name": "TableFullScan", - "table": "nation" - } - ] - } + } ] -} +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/queries/tpcc.sql b/ydb/core/kqp/ut/join/data/queries/tpcc.sql new file mode 100644 index 000000000000..38daad0a176e --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/tpcc.sql @@ -0,0 +1,8 @@ +SELECT COUNT(DISTINCT (s.S_I_ID)) AS STOCK_COUNT +FROM `/Root/test/tpcc/order_line` as ol INNER JOIN `/Root/test/tpcc/stock` as s ON s.S_I_ID = ol.OL_I_ID +WHERE ol.OL_W_ID = 1 +AND ol.OL_D_ID = 10 +AND ol.OL_O_ID < 3000 +AND ol.OL_O_ID >= 2900 +AND s.S_W_ID = 1 +AND s.S_QUANTITY < 15 diff --git a/ydb/core/kqp/ut/join/data/schema/tpcc.sql b/ydb/core/kqp/ut/join/data/schema/tpcc.sql new file mode 100644 index 000000000000..6a4f220dc976 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/schema/tpcc.sql @@ -0,0 +1,133 @@ +CREATE TABLE`/Root/test/tpcc/warehouse` ( + W_ID Int32 NOT NULL, + W_YTD Double, + W_TAX Double, + W_NAME Utf8, + W_STREET_1 Utf8, + W_STREET_2 Utf8, + W_CITY Utf8, + W_STATE Utf8, + W_ZIP Utf8, + PRIMARY KEY (W_ID) + ); + +CREATE TABLE `/Root/test/tpcc/item` ( + I_ID Int32 NOT NULL, + I_NAME Utf8, + I_PRICE Double, + I_DATA Utf8, + I_IM_ID Int32, + PRIMARY KEY (I_ID) + ); + +CREATE TABLE `/Root/test/tpcc/stock` ( + S_W_ID Int32 NOT NULL, + S_I_ID Int32 NOT NULL, + S_QUANTITY Int32, + S_YTD Double, + S_ORDER_CNT Int32, + S_REMOTE_CNT Int32, + S_DATA Utf8, + S_DIST_01 Utf8, + S_DIST_02 Utf8, + S_DIST_03 Utf8, + S_DIST_04 Utf8, + S_DIST_05 Utf8, + S_DIST_06 Utf8, + S_DIST_07 Utf8, + S_DIST_08 Utf8, + S_DIST_09 Utf8, + S_DIST_10 Utf8, + PRIMARY KEY (S_W_ID, S_I_ID) + ); + +CREATE TABLE `/Root/test/tpcc/district` ( + D_W_ID Int32 NOT NULL, + D_ID Int32 NOT NULL, + D_YTD Double, + D_TAX Double, + D_NEXT_O_ID Int32, + D_NAME Utf8, + D_STREET_1 Utf8, + D_STREET_2 Utf8, + D_CITY Utf8, + D_STATE Utf8, + D_ZIP Utf8, + PRIMARY KEY (D_W_ID, D_ID) + ); + +CREATE TABLE `/Root/test/tpcc/customer` ( + C_W_ID Int32 NOT NULL, + C_D_ID Int32 NOT NULL, + C_ID Int32 NOT NULL, + C_DISCOUNT Double, + C_CREDIT Utf8, + C_LAST Utf8, + C_FIRST Utf8, + C_CREDIT_LIM Double, + C_BALANCE Double, + C_YTD_PAYMENT Double, + C_PAYMENT_CNT Int32, + C_DELIVERY_CNT Int32, + C_STREET_1 Utf8, + C_STREET_2 Utf8, + C_CITY Utf8, + C_STATE Utf8, + C_ZIP Utf8, + C_PHONE Utf8, + C_SINCE Timestamp, + C_MIDDLE Utf8, + C_DATA Utf8, + + PRIMARY KEY (C_W_ID, C_D_ID, C_ID) + ); + +CREATE TABLE `/Root/test/tpcc/history` ( + H_C_W_ID Int32, + H_C_ID Int32, + H_C_D_ID Int32, + H_D_ID Int32, + H_W_ID Int32, + H_DATE Timestamp, + H_AMOUNT Double, + H_DATA Utf8, + H_C_NANO_TS Int64 NOT NULL, + + PRIMARY KEY (H_C_W_ID, H_C_NANO_TS) + ); + +CREATE TABLE `/Root/test/tpcc/oorder` ( + O_W_ID Int32 NOT NULL, + O_D_ID Int32 NOT NULL, + O_ID Int32 NOT NULL, + O_C_ID Int32, + O_CARRIER_ID Int32, + O_OL_CNT Int32, + O_ALL_LOCAL Int32, + O_ENTRY_D Timestamp, + + PRIMARY KEY (O_W_ID, O_D_ID, O_ID) + ); + + CREATE TABLE `/Root/test/tpcc/new_order` ( + NO_W_ID Int32 NOT NULL, + NO_D_ID Int32 NOT NULL, + NO_O_ID Int32 NOT NULL, + + PRIMARY KEY (NO_W_ID, NO_D_ID, NO_O_ID) + ); + +CREATE TABLE `/Root/test/tpcc/order_line` ( + OL_W_ID Int32 NOT NULL, + OL_D_ID Int32 NOT NULL, + OL_O_ID Int32 NOT NULL, + OL_NUMBER Int32 NOT NULL, + OL_I_ID Int32, + OL_DELIVERY_D Timestamp, + OL_AMOUNT Double, + OL_SUPPLY_W_ID Int32, + OL_QUANTITY Double, + OL_DIST_INFO Utf8, + + PRIMARY KEY (OL_W_ID, OL_D_ID, OL_O_ID, OL_NUMBER) + ); diff --git a/ydb/core/kqp/ut/join/data/stats/tpcc.json b/ydb/core/kqp/ut/join/data/stats/tpcc.json new file mode 100644 index 000000000000..65216bf8d3f0 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/stats/tpcc.json @@ -0,0 +1,1126 @@ +{ + "/Root/test/tpcc/customer": { + "byte_size": 364073640263, + "n_rows": 502385014, + "n_attrs": 21, + "key_columns": [ + "C_W_ID", + "C_D_ID", + "C_ID" + ], + "columns": [ + { + "name": "C_BALANCE", + "most_common_value": [ + { + "frequency": 446216512, + "value": -10, + "type": "Double" + } + ], + "hyperloglog": 11726893 + }, + { + "name": "C_CITY", + "most_common_value": [ + { + "frequency": 1, + "value": "oqfvodrxknyozpvlho", + "type": "Utf8" + } + ], + "hyperloglog": 483844714 + }, + { + "name": "C_CREDIT", + "most_common_value": [ + { + "frequency": 431991877, + "value": "GC", + "type": "Utf8" + } + ], + "hyperloglog": 2 + }, + { + "name": "C_CREDIT_LIM", + "most_common_value": [ + { + "frequency": 480000000, + "value": 50000, + "type": "Double" + } + ], + "hyperloglog": 1 + }, + { + "name": "C_DATA", + "most_common_value": [ + { + "frequency": 1, + "value": "ycrhjuzbhdemdsiwsluqzicrbxphbdkovldvdpgvcpkivexexhncyumaqwqajquaowrbbbcfbhfiuubydkpugamrphgrimpsmiftsttnnpmsrowhrrrryohjwbuuejmqeqgysxuhkocmqyxakghxwcqzuwxkxpknrmtagpukwzvgfxoadevtxlewyealzcubstrritucaqrkyckwrwmnknlnxjtotmtoqpvqofqfulciwqwjckxdatsjceuzzieuqevzrdjmchwrjpfhovmwxwqoinhqqxkzqinidghnswrvlbaaedbgnhkqfvrtjtowfohbgylyisbeuicfdqdlrinbvwhocjkegffnzghtybqovkbjfropomanqvkjnthtszlldjsxszutqfnyqdgjhhgytiaof", + "type": "Utf8" + } + ], + "hyperloglog": 474174349 + }, + { + "name": "C_DELIVERY_CNT", + "most_common_value": [ + { + "frequency": 462671200, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "C_DISCOUNT", + "most_common_value": [ + { + "frequency": 8885, + "value": 0.2019, + "type": "Double" + } + ], + "hyperloglog": 4976 + }, + { + "name": "C_D_ID", + "most_common_value": [ + { + "frequency": 48000000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "C_FIRST", + "most_common_value": [ + { + "frequency": 1, + "value": "tmrpgxzvej", + "type": "Utf8" + } + ], + "hyperloglog": 474656250 + }, + { + "name": "C_ID", + "most_common_value": [ + { + "frequency": 160000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 2992 + }, + { + "name": "C_LAST", + "most_common_value": [ + { + "frequency": 8361629, + "value": "ANTIANTIATION", + "type": "Utf8" + } + ], + "hyperloglog": 1000 + }, + { + "name": "C_MIDDLE", + "most_common_value": [ + { + "frequency": 480000000, + "value": "OE", + "type": "Utf8" + } + ], + "hyperloglog": 1 + }, + { + "name": "C_PAYMENT_CNT", + "most_common_value": [ + { + "frequency": 462927566, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 14 + }, + { + "name": "C_PHONE", + "most_common_value": [ + { + "frequency": 1, + "value": "0071372729356440", + "type": "Utf8" + } + ], + "hyperloglog": 477456883 + }, + { + "name": "C_SINCE", + "most_common_value": [ + { + "frequency": 1313, + "value": "2023-11-28T12:27:16.886000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 1348820 + }, + { + "name": "C_STATE", + "most_common_value": [ + { + "frequency": 178037, + "value": "TR", + "type": "Utf8" + } + ], + "hyperloglog": 676 + }, + { + "name": "C_STREET_1", + "most_common_value": [ + { + "frequency": 1, + "value": "qdxmgxsnnneky", + "type": "Utf8" + } + ], + "hyperloglog": 475295227 + }, + { + "name": "C_STREET_2", + "most_common_value": [ + { + "frequency": 1, + "value": "msvtqyfximgibejeklk", + "type": "Utf8" + } + ], + "hyperloglog": 472307567 + }, + { + "name": "C_W_ID", + "most_common_value": [ + { + "frequency": 30000, + "value": 4301, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "C_YTD_PAYMENT", + "most_common_value": [ + { + "frequency": 462927566, + "value": 10, + "type": "Double" + } + ], + "hyperloglog": 1253170 + }, + { + "name": "C_ZIP", + "most_common_value": [ + { + "frequency": 3303, + "value": "139311111", + "type": "Utf8" + } + ], + "hyperloglog": 10087 + } + ] + }, + "/Root/test/tpcc/district": { + "byte_size": 30807162, + "n_rows": 160000, + "n_attrs": 11, + "key_columns": [ + "D_W_ID", + "D_ID" + ], + "columns": [ + { + "name": "D_CITY", + "most_common_value": [ + { + "frequency": 1, + "value": "ckhktweebpdmslxp", + "type": "Utf8" + } + ], + "hyperloglog": 160227 + }, + { + "name": "D_ID", + "most_common_value": [ + { + "frequency": 16000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "D_NAME", + "most_common_value": [ + { + "frequency": 1, + "value": "dofbomzm", + "type": "Utf8" + } + ], + "hyperloglog": 159468 + }, + { + "name": "D_NEXT_O_ID", + "most_common_value": [ + { + "frequency": 4598, + "value": 3125, + "type": "Int32" + } + ], + "hyperloglog": 114 + }, + { + "name": "D_STATE", + "most_common_value": [ + { + "frequency": 95, + "value": "TK", + "type": "Utf8" + } + ], + "hyperloglog": 676 + }, + { + "name": "D_STREET_1", + "most_common_value": [ + { + "frequency": 1, + "value": "ipbwydczjge", + "type": "Utf8" + } + ], + "hyperloglog": 159930 + }, + { + "name": "D_STREET_2", + "most_common_value": [ + { + "frequency": 1, + "value": "jypxlaanveayjik", + "type": "Utf8" + } + ], + "hyperloglog": 160038 + }, + { + "name": "D_TAX", + "most_common_value": [ + { + "frequency": 27, + "value": 0.1475, + "type": "Double" + } + ], + "hyperloglog": 2001 + }, + { + "name": "D_W_ID", + "most_common_value": [ + { + "frequency": 10, + "value": 301, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "D_YTD", + "most_common_value": [ + { + "frequency": 1, + "value": 326749.86, + "type": "Double" + } + ], + "hyperloglog": 160208 + }, + { + "name": "D_ZIP", + "most_common_value": [ + { + "frequency": 160000, + "value": "123456789", + "type": "Utf8" + } + ], + "hyperloglog": 1 + } + ] + }, + "/Root/test/tpcc/history": { + "byte_size": 41478646661, + "n_rows": 499625922, + "n_attrs": 9, + "key_columns": [ + "H_C_NANO_TS" + ], + "columns": [ + { + "name": "H_AMOUNT", + "most_common_value": [ + { + "frequency": 480000040, + "value": 10, + "type": "Double" + } + ], + "hyperloglog": 501152 + }, + { + "name": "H_C_D_ID", + "most_common_value": [ + { + "frequency": 49965889, + "value": 2, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "H_C_ID", + "most_common_value": [ + { + "frequency": 213359, + "value": 75, + "type": "Int32" + } + ], + "hyperloglog": 2992 + }, + { + "name": "H_C_NANO_TS", + "most_common_value": [ + { + "frequency": 1, + "value": 3543847373496118, + "type": "Int64" + } + ], + "hyperloglog": 497158709 + }, + { + "name": "H_C_W_ID", + "most_common_value": [ + { + "frequency": 31756, + "value": 5278, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "H_DATA", + "most_common_value": [ + { + "frequency": 161, + "value": "lgxdat dblfxfb", + "type": "Utf8" + } + ], + "hyperloglog": 485105795 + }, + { + "name": "H_DATE", + "most_common_value": [ + { + "frequency": 7639, + "value": "2023-11-28T12:22:10.829000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 7299624 + }, + { + "name": "H_D_ID", + "most_common_value": [ + { + "frequency": 49964655, + "value": 2, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "H_W_ID", + "most_common_value": [ + { + "frequency": 31259, + "value": 6050, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/item": { + "byte_size": 9762242, + "n_rows": 100000, + "n_attrs": 5, + "key_columns": [ + "I_ID" + ], + "columns": [ + { + "name": "I_DATA", + "most_common_value": [ + { + "frequency": 1, + "value": "myudbheisdtjypfsbaijhlnvzmvwa", + "type": "Utf8" + } + ], + "hyperloglog": 100201 + }, + { + "name": "I_ID", + "most_common_value": [ + { + "frequency": 1, + "value": 20625, + "type": "Int32" + } + ], + "hyperloglog": 98520 + }, + { + "name": "I_IM_ID", + "most_common_value": [ + { + "frequency": 11, + "value": 8262, + "type": "Int32" + } + ], + "hyperloglog": 9983 + }, + { + "name": "I_NAME", + "most_common_value": [ + { + "frequency": 1, + "value": "livzdcddkzovol", + "type": "Utf8" + } + ], + "hyperloglog": 102336 + }, + { + "name": "I_PRICE", + "most_common_value": [ + { + "frequency": 9, + "value": 51.89, + "type": "Double" + } + ], + "hyperloglog": 9831 + } + ] + }, + "/Root/test/tpcc/new_order": { + "byte_size": 3736621547, + "n_rows": 181109395, + "n_attrs": 3, + "key_columns": [ + "NO_W_ID", + "NO_D_ID", + "NO_O_ID" + ], + "columns": [ + { + "name": "NO_D_ID", + "most_common_value": [ + { + "frequency": 14661893, + "value": 10, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "NO_O_ID", + "most_common_value": [ + { + "frequency": 160000, + "value": 2285, + "type": "Int32" + } + ], + "hyperloglog": 1019 + }, + { + "name": "NO_W_ID", + "most_common_value": [ + { + "frequency": 9586, + "value": 8460, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/oorder": { + "byte_size": 38933878538, + "n_rows": 514367988, + "n_attrs": 8, + "key_columns": [ + "O_W_ID", + "O_D_ID", + "O_ID" + ], + "columns": [ + { + "name": "O_ALL_LOCAL", + "most_common_value": [ + { + "frequency": 498049256, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "O_CARRIER_ID", + "most_common_value": [ + { + "frequency": 126671200, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 11 + }, + { + "name": "O_C_ID", + "most_common_value": [ + { + "frequency": 456301, + "value": 1283, + "type": "Int32" + } + ], + "hyperloglog": 2992 + }, + { + "name": "O_D_ID", + "most_common_value": [ + { + "frequency": 49994773, + "value": 10, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "O_ENTRY_D", + "most_common_value": [ + { + "frequency": 8186, + "value": "2023-11-28T12:22:40.484000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 7973722 + }, + { + "name": "O_ID", + "most_common_value": [ + { + "frequency": 160000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 3166 + }, + { + "name": "O_OL_CNT", + "most_common_value": [ + { + "frequency": 45519522, + "value": 7, + "type": "Int32" + } + ], + "hyperloglog": 11 + }, + { + "name": "O_W_ID", + "most_common_value": [ + { + "frequency": 31489, + "value": 8086, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/order_line": { + "byte_size": 479855221145, + "n_rows": 5053103449, + "n_attrs": 10, + "key_columns": [ + "OL_W_ID", + "OL_D_ID", + "OL_O_ID", + "OL_NUMBER" + ], + "columns": [ + { + "name": "OL_AMOUNT", + "most_common_value": [ + { + "frequency": 100804305, + "value": 0, + "type": "Double" + } + ], + "hyperloglog": 998151 + }, + { + "name": "OL_DELIVERY_D", + "most_common_value": [ + { + "frequency": 4529362528, + "value": "1970-01-01T00:00:00.000000Z", + "type": "Timestamp" + } + ], + "hyperloglog": 10386126 + }, + { + "name": "OL_DIST_INFO", + "most_common_value": [ + { + "frequency": 11, + "value": "iodfnhufrvksbxsgvolhfiv", + "type": "Utf8" + } + ], + "hyperloglog": 4969823023 + }, + { + "name": "OL_D_ID", + "most_common_value": [ + { + "frequency": 499905256, + "value": 8, + "type": "Int32" + } + ], + "hyperloglog": 10 + }, + { + "name": "OL_I_ID", + "most_common_value": [ + { + "frequency": 260692, + "value": 32487, + "type": "Int32" + } + ], + "hyperloglog": 98520 + }, + { + "name": "OL_NUMBER", + "most_common_value": [ + { + "frequency": 499935520, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 15 + }, + { + "name": "OL_O_ID", + "most_common_value": [ + { + "frequency": 332407, + "value": 505, + "type": "Int32" + } + ], + "hyperloglog": 3166 + }, + { + "name": "OL_QUANTITY", + "most_common_value": [ + { + "frequency": 4819789043, + "value": 5, + "type": "Double" + } + ], + "hyperloglog": 10 + }, + { + "name": "OL_SUPPLY_W_ID", + "most_common_value": [ + { + "frequency": 315544, + "value": 8740, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "OL_W_ID", + "most_common_value": [ + { + "frequency": 315679, + "value": 8740, + "type": "Int32" + } + ], + "hyperloglog": 15871 + } + ] + }, + "/Root/test/tpcc/stock": { + "byte_size": 671217489018, + "n_rows": 1709535853, + "n_attrs": 17, + "key_columns": [ + "S_W_ID", + "S_I_ID" + ], + "columns": [ + { + "name": "S_DATA", + "most_common_value": [ + { + "frequency": 1, + "value": "ewwmspkramcqkhjoxdvpbnucibhbbmzsnfwbmlxwvvtiaaizl", + "type": "Utf8" + } + ], + "hyperloglog": 1601496867 + }, + { + "name": "S_DIST_01", + "most_common_value": [ + { + "frequency": 1, + "value": "zwuoyreziuyfqanfepbyutn", + "type": "Utf8" + } + ], + "hyperloglog": 1619141249 + }, + { + "name": "S_DIST_02", + "most_common_value": [ + { + "frequency": 1, + "value": "cftgvsxxfatlhioqkjdcifz", + "type": "Utf8" + } + ], + "hyperloglog": 1605043436 + }, + { + "name": "S_DIST_03", + "most_common_value": [ + { + "frequency": 1, + "value": "kqysaivijxddogkugeppkuw", + "type": "Utf8" + } + ], + "hyperloglog": 1593035873 + }, + { + "name": "S_DIST_04", + "most_common_value": [ + { + "frequency": 1, + "value": "jhdriqzhdqsxesmefzscmcs", + "type": "Utf8" + } + ], + "hyperloglog": 1614620856 + }, + { + "name": "S_DIST_05", + "most_common_value": [ + { + "frequency": 1, + "value": "bsvfsdoshzqbqpamjhuugsd", + "type": "Utf8" + } + ], + "hyperloglog": 1608127910 + }, + { + "name": "S_DIST_06", + "most_common_value": [ + { + "frequency": 1, + "value": "liommwinzjcuhqgqrxjuqch", + "type": "Utf8" + } + ], + "hyperloglog": 1608998551 + }, + { + "name": "S_DIST_07", + "most_common_value": [ + { + "frequency": 1, + "value": "hrzncabpbzsllvwhpxcangt", + "type": "Utf8" + } + ], + "hyperloglog": 1595808444 + }, + { + "name": "S_DIST_08", + "most_common_value": [ + { + "frequency": 1, + "value": "jjanqwevafndsidydpylvlg", + "type": "Utf8" + } + ], + "hyperloglog": 1603549619 + }, + { + "name": "S_DIST_09", + "most_common_value": [ + { + "frequency": 1, + "value": "opmoxfjaqcaapumfpalsycu", + "type": "Utf8" + } + ], + "hyperloglog": 1596371895 + }, + { + "name": "S_DIST_10", + "most_common_value": [ + { + "frequency": 1, + "value": "eegmvopbvshfxchhpaxpalk", + "type": "Utf8" + } + ], + "hyperloglog": 1587912389 + }, + { + "name": "S_I_ID", + "most_common_value": [ + { + "frequency": 16000, + "value": 1, + "type": "Int32" + } + ], + "hyperloglog": 98520 + }, + { + "name": "S_ORDER_CNT", + "most_common_value": [ + { + "frequency": 1473756452, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "S_QUANTITY", + "most_common_value": [ + { + "frequency": 17595758, + "value": 96, + "type": "Int32" + } + ], + "hyperloglog": 91 + }, + { + "name": "S_REMOTE_CNT", + "most_common_value": [ + { + "frequency": 1598736389, + "value": 0, + "type": "Int32" + } + ], + "hyperloglog": 2 + }, + { + "name": "S_W_ID", + "most_common_value": [ + { + "frequency": 100000, + "value": 721, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "S_YTD", + "most_common_value": [ + { + "frequency": 12632304, + "value": 7, + "type": "Double" + } + ], + "hyperloglog": 10 + } + ] + }, + "/Root/test/tpcc/warehouse": { + "byte_size": 2527068, + "n_rows": 16000, + "n_attrs": 9, + "key_columns": [ + "W_ID" + ], + "columns": [ + { + "name": "W_CITY", + "most_common_value": [ + { + "frequency": 1, + "value": "syzobegklbrsy", + "type": "Utf8" + } + ], + "hyperloglog": 16131 + }, + { + "name": "W_ID", + "most_common_value": [ + { + "frequency": 1, + "value": 13601, + "type": "Int32" + } + ], + "hyperloglog": 15871 + }, + { + "name": "W_NAME", + "most_common_value": [ + { + "frequency": 1, + "value": "exucl", + "type": "Utf8" + } + ], + "hyperloglog": 15903 + }, + { + "name": "W_STATE", + "most_common_value": [ + { + "frequency": 26, + "value": "GR", + "type": "Utf8" + } + ], + "hyperloglog": 676 + }, + { + "name": "W_STREET_1", + "most_common_value": [ + { + "frequency": 1, + "value": "yvowutfetbmznmfpo", + "type": "Utf8" + } + ], + "hyperloglog": 16226 + }, + { + "name": "W_STREET_2", + "most_common_value": [ + { + "frequency": 1, + "value": "wsdlnkymvefrv", + "type": "Utf8" + } + ], + "hyperloglog": 15987 + }, + { + "name": "W_TAX", + "most_common_value": [ + { + "frequency": 11, + "value": 0.0868, + "type": "Double" + } + ], + "hyperloglog": 2000 + }, + { + "name": "W_YTD", + "most_common_value": [ + { + "frequency": 1, + "value": 3195732.239999999, + "type": "Double" + } + ], + "hyperloglog": 16124 + }, + { + "name": "W_ZIP", + "most_common_value": [ + { + "frequency": 16000, + "value": "123456789", + "type": "Utf8" + } + ], + "hyperloglog": 1 + } + ] + } +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index 44657d1b7e7a..77d57ca96b1c 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -45,6 +45,9 @@ static void CreateSampleTable(TSession session) { UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpch.sql")).GetValueSync().IsSuccess()); UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpcds.sql")).GetValueSync().IsSuccess()); + + UNIT_ASSERT(session.ExecuteSchemeQuery(GetStatic("schema/tpcc.sql")).GetValueSync().IsSuccess()); + } static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false, TString stats = ""){ @@ -328,6 +331,12 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) { "queries/tpcds78.sql", "stats/tpcds1000s.json", "join_order/tpcds78_1000s.json", StreamLookupJoin ); } + + Y_UNIT_TEST(TPCC) { + JoinOrderTestWithOverridenStats( + "queries/tpcc.sql", "stats/tpcc.json", "join_order/tpcc.json", false); + } + } } }