diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ea96c0c6..fdec31a0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -334,6 +334,9 @@ set(P3_FILES "src/include/execution/executors/seq_scan_executor.h" "src/include/execution/executors/external_merge_sort_executor.h" "src/include/execution/executors/update_executor.h" + "src/include/execution/plans/aggregation_plan.h" + "src/include/storage/page/intermediate_result_page.h" + "src/include/execution/executors/window_function_executor.h" "src/execution/aggregation_executor.cpp" "src/execution/delete_executor.cpp" "src/execution/filter_executor.cpp" @@ -346,6 +349,7 @@ set(P3_FILES "src/execution/seq_scan_executor.cpp" "src/execution/external_merge_sort_executor.cpp" "src/execution/update_executor.cpp" + "src/execution/window_function_executor.cpp" "src/include/execution/execution_common.h" "src/include/optimizer/optimizer.h" "src/include/optimizer/optimizer_internal.h" diff --git a/src/binder/bind_select.cpp b/src/binder/bind_select.cpp index 9d2848156..d693d1a99 100644 --- a/src/binder/bind_select.cpp +++ b/src/binder/bind_select.cpp @@ -945,8 +945,20 @@ auto Binder::BindSort(duckdb_libpgquery::PGList *list) -> std::vectorsortby_nulls == duckdb_libpgquery::PG_SORTBY_NULLS_DEFAULT) { + null_order = OrderByNullType::DEFAULT; + } else if (sort->sortby_nulls == duckdb_libpgquery::PG_SORTBY_NULLS_FIRST) { + null_order = OrderByNullType::NULLS_FIRST; + } else if (sort->sortby_nulls == duckdb_libpgquery::PG_SORTBY_NULLS_LAST) { + null_order = OrderByNullType::NULLS_LAST; + } else { + throw NotImplementedException("unimplemented nulls order type"); + } + auto order_expression = BindExpression(target); - order_by.emplace_back(std::make_unique(type, std::move(order_expression))); + order_by.emplace_back(std::make_unique(type, null_order, std::move(order_expression))); } else { throw NotImplementedException("unsupported order by node"); } diff --git a/src/execution/aggregation_executor.cpp b/src/execution/aggregation_executor.cpp index 575b5d1c9..512ab55f5 100644 --- a/src/execution/aggregation_executor.cpp +++ b/src/execution/aggregation_executor.cpp @@ -33,13 +33,17 @@ AggregationExecutor::AggregationExecutor(ExecutorContext *exec_ctx, const Aggreg void AggregationExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** - * Yield the next tuple from the insert. - * @param[out] tuple The next tuple produced by the aggregation - * @param[out] rid The next tuple RID produced by the aggregation - * @return `true` if a tuple was produced, `false` if there are no more tuples + * Yield the next tuple batch from the aggregation. + * @param[out] tuple_batch The next batch of tuples produced by the aggregation + * @param[out] rid_batch The next batch of tuple RIDs produced by the aggregation + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) + * @return `true` if any tuples were produced, `false` if there are no more tuples */ -auto AggregationExecutor::Next(Tuple *tuple, RID *rid) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } +auto AggregationExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + UNIMPLEMENTED("TODO(P3): Add implementation."); +} /** Do not use or remove this function; otherwise, you will get zero points. */ auto AggregationExecutor::GetChildExecutor() const -> const AbstractExecutor * { return child_executor_.get(); } diff --git a/src/execution/delete_executor.cpp b/src/execution/delete_executor.cpp index 663ad7bfe..d95324499 100644 --- a/src/execution/delete_executor.cpp +++ b/src/execution/delete_executor.cpp @@ -34,14 +34,16 @@ void DeleteExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** * Yield the number of rows deleted from the table. - * @param[out] tuple The integer tuple indicating the number of rows deleted from the table - * @param[out] rid The next tuple RID produced by the delete (ignore, not used) + * @param[out] tuple_batch The tuple batch with one integer indicating the number of rows deleted from the table + * @param[out] rid_batch The next tuple RID batch produced by the delete (ignore, not used) + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples * - * NOTE: DeleteExecutor::Next() does not use the `rid` out-parameter. + * NOTE: DeleteExecutor::Next() does not use the `rid_batch` out-parameter. * NOTE: DeleteExecutor::Next() returns true with the number of deleted rows produced only once. */ -auto DeleteExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { +auto DeleteExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } diff --git a/src/execution/external_merge_sort_executor.cpp b/src/execution/external_merge_sort_executor.cpp index 687b20e1f..1fb87165d 100644 --- a/src/execution/external_merge_sort_executor.cpp +++ b/src/execution/external_merge_sort_executor.cpp @@ -31,13 +31,15 @@ void ExternalMergeSortExecutor::Init() { } /** - * Yield the next tuple from the external merge sort. - * @param[out] tuple The next tuple produced by the external merge sort. - * @param[out] rid The next tuple RID produced by the external merge sort. + * Yield the next tuple batch from the external merge sort. + * @param[out] tuple_batch The next tuple batch produced by the external merge sort. + * @param[out] rid_batch The next tuple RID batch produced by the external merge sort. + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ template -auto ExternalMergeSortExecutor::Next(Tuple *tuple, RID *rid) -> bool { +auto ExternalMergeSortExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } diff --git a/src/execution/filter_executor.cpp b/src/execution/filter_executor.cpp index 6ff2071f2..c17a17fc3 100644 --- a/src/execution/filter_executor.cpp +++ b/src/execution/filter_executor.cpp @@ -33,26 +33,70 @@ void FilterExecutor::Init() { } /** - * Yield the next tuple from the filter. - * @param[out] tuple The next tuple produced by the filter - * @param[out] rid The next tuple RID produced by the filter + * Yield the next tuple batch from the filter. + * @param[out] tuple_batch The next tuple batch produced by the filter + * @param[out] rid_batch The next tuple RID batch produced by the filter + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto FilterExecutor::Next(Tuple *tuple, RID *rid) -> bool { +auto FilterExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + tuple_batch->clear(); + rid_batch->clear(); + auto filter_expr = plan_->GetPredicate(); while (true) { - // Get the next tuple - const auto status = child_executor_->Next(tuple, rid); + // If the child offset is not zero, process remaining tuples in the last fetched batch + if (child_offset_ != 0) { + for (size_t i = child_offset_; i < child_tuples_.size(); ++i) { + auto &tuple = child_tuples_[i]; + auto &rid = child_rids_[i]; + // Evaluate the filter predicate + auto value = filter_expr->Evaluate(&tuple, child_executor_->GetOutputSchema()); + if (filter_expr == nullptr || (!value.IsNull() && value.GetAs())) { + tuple_batch->push_back(tuple); + rid_batch->push_back(rid); + } + } + } + + child_offset_ = 0; + + // Get the next tuple batch from the child executor + const auto status = child_executor_->Next(&child_tuples_, &child_rids_, batch_size); - if (!status) { + // If no more tuples and output batch is empty, return false + if (!status && tuple_batch->empty()) { return false; } - auto value = filter_expr->Evaluate(tuple, child_executor_->GetOutputSchema()); - if (!value.IsNull() && value.GetAs()) { + // If no more tuples but output batch is not empty, return true + if (!status && !tuple_batch->empty()) { return true; } + + for (size_t i = 0; i < child_tuples_.size(); ++i) { + auto &tuple = child_tuples_[i]; + auto &rid = child_rids_[i]; + // Evaluate the filter predicate + auto value = filter_expr->Evaluate(&tuple, child_executor_->GetOutputSchema()); + if (filter_expr == nullptr || (!value.IsNull() && value.GetAs())) { + tuple_batch->push_back(tuple); + rid_batch->push_back(rid); + if (tuple_batch->size() >= batch_size) { + // If we have filled the output batch but not yet reached the end of the current child batch, update the + // offset and return + if (i + 1 < child_tuples_.size()) { + child_offset_ = i + 1; + } else { + child_offset_ = 0; + } + + return true; + } + } + } } } diff --git a/src/execution/hash_join_executor.cpp b/src/execution/hash_join_executor.cpp index 46178329e..c10cbf405 100644 --- a/src/execution/hash_join_executor.cpp +++ b/src/execution/hash_join_executor.cpp @@ -37,11 +37,15 @@ HashJoinExecutor::HashJoinExecutor(ExecutorContext *exec_ctx, const HashJoinPlan void HashJoinExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** - * Yield the next tuple from the join. - * @param[out] tuple The next tuple produced by the join. - * @param[out] rid The next tuple RID, not used by hash join. - * @return `true` if a tuple was produced, `false` if there are no more tuples. + * Yield the next tuple batch from the hash join. + * @param[out] tuple_batch The next tuple batch produced by the hash join + * @param[out] rid_batch The next tuple RID batch produced by the hash join + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) + * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto HashJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } +auto HashJoinExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + UNIMPLEMENTED("TODO(P3): Add implementation."); +} } // namespace bustub diff --git a/src/execution/index_scan_executor.cpp b/src/execution/index_scan_executor.cpp index 11aeeb435..1a722a23a 100644 --- a/src/execution/index_scan_executor.cpp +++ b/src/execution/index_scan_executor.cpp @@ -27,6 +27,9 @@ IndexScanExecutor::IndexScanExecutor(ExecutorContext *exec_ctx, const IndexScanP void IndexScanExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } -auto IndexScanExecutor::Next(Tuple *tuple, RID *rid) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } +auto IndexScanExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + UNIMPLEMENTED("TODO(P3): Add implementation."); +} } // namespace bustub diff --git a/src/execution/init_check_executor.cpp b/src/execution/init_check_executor.cpp index f6e454633..fbbb8b882 100644 --- a/src/execution/init_check_executor.cpp +++ b/src/execution/init_check_executor.cpp @@ -36,14 +36,19 @@ void InitCheckExecutor::Init() { } /** - * Yield the next tuple from the child executor. - * @param[out] tuple The next tuple produced by the child executor - * @param[out] rid The next tuple RID produced by the child executor + * Yield the next tuple batch from the child executor. + * @param[out] tuple_batch The next tuple batch produced by the child executor + * @param[out] rid_batch The next tuple RID batch produced by the child executor + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto InitCheckExecutor::Next(Tuple *tuple, RID *rid) -> bool { +auto InitCheckExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + tuple_batch->clear(); + rid_batch->clear(); + // Emit the next tuple - auto result = child_executor_->Next(tuple, rid); + auto result = child_executor_->Next(tuple_batch, rid_batch, batch_size); if (result) { n_next_++; } diff --git a/src/execution/insert_executor.cpp b/src/execution/insert_executor.cpp index 72c0f09a7..0b38e61e9 100644 --- a/src/execution/insert_executor.cpp +++ b/src/execution/insert_executor.cpp @@ -34,14 +34,16 @@ void InsertExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** * Yield the number of rows inserted into the table. - * @param[out] tuple The integer tuple indicating the number of rows inserted into the table - * @param[out] rid The next tuple RID produced by the insert (ignore, not used) + * @param[out] tuple_batch The tuple batch with one integer indicating the number of rows inserted into the table + * @param[out] rid_batch The next tuple RID batch produced by the insert (ignore, not used) + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples * - * NOTE: InsertExecutor::Next() does not use the `rid` out-parameter. - * NOTE: InsertExecutor::Next() returns true with number of inserted rows produced only once. + * NOTE: InsertExecutor::Next() does not use the `rid_batch` out-parameter. + * NOTE: InsertExecutor::Next() returns true with the number of inserted rows produced only once. */ -auto InsertExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { +auto InsertExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } diff --git a/src/execution/limit_executor.cpp b/src/execution/limit_executor.cpp index f9e8918f9..73494f3ad 100644 --- a/src/execution/limit_executor.cpp +++ b/src/execution/limit_executor.cpp @@ -31,11 +31,15 @@ LimitExecutor::LimitExecutor(ExecutorContext *exec_ctx, const LimitPlanNode *pla void LimitExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** - * Yield the next tuple from the limit. - * @param[out] tuple The next tuple produced by the limit - * @param[out] rid The next tuple RID produced by the limit + * Yield the next tuple batch from the limit. + * @param[out] tuple_batch The next tuple batch produced by the limit + * @param[out] rid_batch The next tuple RID batch produced by the limit + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto LimitExecutor::Next(Tuple *tuple, RID *rid) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } +auto LimitExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + UNIMPLEMENTED("TODO(P3): Add implementation."); +} } // namespace bustub diff --git a/src/execution/mock_scan_executor.cpp b/src/execution/mock_scan_executor.cpp index bdd8beb8c..39f5e2ae3 100644 --- a/src/execution/mock_scan_executor.cpp +++ b/src/execution/mock_scan_executor.cpp @@ -21,6 +21,65 @@ namespace bustub { +inline auto MockRandomValuesEnabled() -> bool { + if (const char *v = std::getenv("BUSTUB_ENABLE_RANDOM"); (v != nullptr) && (*v != 0)) { + return std::string_view(v) != "0"; + } + return false; +} + +inline auto MockSeed() -> uint64_t { + if (const char *s = std::getenv("BUSTUB_MOCKSCAN_SEED"); (s != nullptr) && (*s != 0)) { + try { + return std::stoull(s); + } catch (...) { + } + } + if (const char *s = std::getenv("BUSTUB_TABLEGEN_SEED"); (s != nullptr) && (*s != 0)) { + try { + return std::stoull(s); + } catch (...) { + } + } + std::random_device rd; + return (static_cast(rd()) << 32) ^ static_cast(rd()); +} + +// Per-table RNG: deterministic within a run, different across runs/seeds +inline auto MakeTableRng(const std::string &table) -> std::mt19937_64 { + const uint64_t h = std::hash{}(table); + const uint64_t seed = MockSeed() ^ (h * 0x9E3779B97F4A7C15ULL); + std::seed_seq seq{static_cast(seed), static_cast(seed >> 32), static_cast(h), + static_cast(h >> 32)}; + return std::mt19937_64(seq); +} + +// Generic random value for a column (keeps types/sensible ranges) +inline auto RandomValueForColumn(const Column &col, std::mt19937_64 &rng) -> Value { + switch (col.GetType()) { + case TypeId::INTEGER: { + // Keep modest range so joins can still occasionally match + std::uniform_int_distribution d(0, 2000000); + return ValueFactory::GetIntegerValue(d(rng)); + } + case TypeId::VARCHAR: { + // Length cap respects column length + const uint32_t max_len = std::min(col.GetStorageSize(), 32); + std::uniform_int_distribution len_d(0, static_cast(max_len)); + const int len = len_d(rng); + std::uniform_int_distribution ch_d(33, 126); // printable ASCII + std::string s; + s.reserve(len); + for (int i = 0; i < len; i++) { + s.push_back(static_cast(ch_d(rng))); + } + return ValueFactory::GetVarcharValue(s); + } + default: + return ValueFactory::GetZeroValueByType(col.GetType()); + } +} + static const char *ta_list_2022[] = {"amstqq", "durovo", "joyceliaoo", "karthik-ramanathan-3006", "kush789", "lmwnshn", "mkpjnx", "skyzh", "thepinetree", "timlee0119", "yliang412"}; @@ -41,6 +100,9 @@ static const char *ta_list_2024_fall[] = {"17zhangw", "connortsui20", "J static const char *ta_list_2025_spring[] = {"AlSchlo", "carpecodeum", "ChrisLaspias", "hyoungjook", "joesunil123", "mrwhitezz", "rmboyce", "yliang412"}; +static const char *ta_list_2025_fall[] = {"17zhangw", "quantumish", "songwdfu", "notSaranshMalik", + "shinyumh", "s-wangru", "rayhhome", "MrWhitezz"}; + static const char *ta_oh_2022[] = {"Tuesday", "Wednesday", "Monday", "Wednesday", "Thursday", "Friday", "Wednesday", "Randomly", "Tuesday", "Monday", "Tuesday"}; @@ -59,21 +121,25 @@ static const char *ta_oh_2024_fall[] = {"Wednesday", "Thursday", "Tuesday", "Mon static const char *ta_oh_2025_spring[] = {"Friday", "Monday", "Wednesday", "Tuesday", "Friday", "Thursday", "Monday", "Tuesday"}; +static const char *ta_oh_2025_fall[] = {"Tuesday", "Monday", "Thursday", "Friday", + "Tuesday", "Tuesday", "Friday", "Wednesday"}; + static const char *course_on_date[] = {"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"}; -const char *mock_table_list[] = { - "__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_tas_2022", "__mock_table_tas_2023", - "__mock_table_tas_2023_fall", "__mock_table_tas_2024", "__mock_table_tas_2024_fall", "__mock_table_tas_2025_spring", - "__mock_agg_input_small", "__mock_agg_input_big", "__mock_external_merge_sort_input", "__mock_table_schedule_2022", - "__mock_table_schedule", "__mock_table_123", "__mock_graph", - // For leaderboard Q1 - "__mock_t1", - // For leaderboard Q2 - "__mock_t4_1m", "__mock_t5_1m", "__mock_t6_1m", - // For leaderboard Q3 - "__mock_t7", "__mock_t8", "__mock_t9", - // For P3 leaderboard Q4 - "__mock_t10", "__mock_t11", nullptr}; +const char *mock_table_list[] = {"__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_4", + "__mock_table_tas_2022", "__mock_table_tas_2023", "__mock_table_tas_2023_fall", + "__mock_table_tas_2024", "__mock_table_tas_2024_fall", "__mock_table_tas_2025_spring", + "__mock_table_tas_2025_fall", "__mock_agg_input_small", "__mock_agg_input_big", + "__mock_external_merge_sort_input", "__mock_table_schedule_2022", + "__mock_table_schedule", "__mock_table_123", "__mock_graph", + // For leaderboard Q1 + "__mock_t1", + // For leaderboard Q2 + "__mock_t4_1m", "__mock_t5_1m", "__mock_t6_1m", + // For leaderboard Q3 + "__mock_t7", "__mock_t8", "__mock_t9", + // For P3 leaderboard Q4 + "__mock_t10", "__mock_t11", nullptr}; static const int GRAPH_NODE_CNT = 10; @@ -90,6 +156,10 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema { return Schema{std::vector{Column{"colE", TypeId::INTEGER}, {Column{"colF", TypeId::VARCHAR, 128}}}}; } + if (table == "__mock_table_4") { + return Schema{std::vector{Column{"colG", TypeId::INTEGER}, {Column{"colH", TypeId::VARCHAR, 128}}}}; + } + if (table == "__mock_table_tas_2022") { return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}}; } @@ -114,6 +184,10 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema { return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}}; } + if (table == "__mock_table_tas_2025_fall") { + return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}}; + } + if (table == "__mock_table_schedule_2022") { return Schema{std::vector{Column{"day_of_week", TypeId::VARCHAR, 128}, Column{"has_lecture", TypeId::INTEGER}}}; } @@ -194,6 +268,10 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t { return 100; } + if (table == "__mock_table_4") { + return 100; + } + if (table == "__mock_table_tas_2022") { return sizeof(ta_list_2022) / sizeof(ta_list_2022[0]); } @@ -218,6 +296,10 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t { return sizeof(ta_list_2025_spring) / sizeof(ta_list_2025_spring[0]); } + if (table == "__mock_table_tas_2025_fall") { + return sizeof(ta_list_2025_fall) / sizeof(ta_list_2025_fall[0]); + } + if (table == "__mock_table_schedule_2022") { return sizeof(course_on_date) / sizeof(course_on_date[0]); } @@ -297,42 +379,6 @@ auto GetShuffled(const MockScanPlanNode *plan) -> bool { auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function { const auto &table = plan->GetTable(); - - if (table == "__mock_table_1") { - return [plan](size_t cursor) { - std::vector values{}; - values.reserve(2); - values.push_back(ValueFactory::GetIntegerValue(cursor)); - values.push_back(ValueFactory::GetIntegerValue(cursor * 100)); - return Tuple{values, &plan->OutputSchema()}; - }; - } - - if (table == "__mock_table_2") { - return [plan](size_t cursor) { - std::vector values{}; - values.reserve(2); - values.push_back(ValueFactory::GetVarcharValue(fmt::format("{}-\U0001F4A9", cursor))); // the poop emoji - values.push_back( - ValueFactory::GetVarcharValue(StringUtil::Repeat("\U0001F607", cursor % 8))); // the innocent emoji - return Tuple{values, &plan->OutputSchema()}; - }; - } - - if (table == "__mock_table_3") { - return [plan](size_t cursor) { - std::vector values{}; - values.reserve(2); - if (cursor % 2 == 0) { - values.push_back(ValueFactory::GetIntegerValue(cursor)); - } else { - values.push_back(ValueFactory::GetNullValueByType(TypeId::INTEGER)); - } - values.push_back(ValueFactory::GetVarcharValue(fmt::format("{}-\U0001F4A9", cursor))); // the poop emoji - return Tuple{values, &plan->OutputSchema()}; - }; - } - if (table == "__mock_table_tas_2022") { return [plan](size_t cursor) { std::vector values{}; @@ -387,6 +433,15 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function }; } + if (table == "__mock_table_tas_2025_fall") { + return [plan](size_t cursor) { + std::vector values{}; + values.push_back(ValueFactory::GetVarcharValue(ta_list_2025_fall[cursor])); + values.push_back(ValueFactory::GetVarcharValue(ta_oh_2025_fall[cursor])); + return Tuple{values, &plan->OutputSchema()}; + }; + } + if (table == "__mock_table_schedule_2022") { return [plan](size_t cursor) { std::vector values{}; @@ -405,6 +460,65 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function }; } + if (table == "__mock_table_1") { + return [plan](size_t cursor) { + std::vector values{}; + values.reserve(2); + values.push_back(ValueFactory::GetIntegerValue(cursor)); + values.push_back(ValueFactory::GetIntegerValue(cursor * 100)); + return Tuple{values, &plan->OutputSchema()}; + }; + } + + if (table == "__mock_table_2") { + return [plan](size_t cursor) { + std::vector values{}; + values.reserve(2); + values.push_back(ValueFactory::GetVarcharValue(fmt::format("{}-\U0001F4A9", cursor))); // the poop emoji + values.push_back( + ValueFactory::GetVarcharValue(StringUtil::Repeat("\U0001F607", cursor % 8))); // the innocent emoji + return Tuple{values, &plan->OutputSchema()}; + }; + } + + if (table == "__mock_table_3") { + return [plan](size_t cursor) { + std::vector values{}; + values.reserve(2); + if (cursor % 2 == 0) { + values.push_back(ValueFactory::GetIntegerValue(cursor)); + } else { + values.push_back(ValueFactory::GetNullValueByType(TypeId::INTEGER)); + } + values.push_back(ValueFactory::GetVarcharValue(fmt::format("{}-\U0001F4A9", cursor))); // the poop emoji + return Tuple{values, &plan->OutputSchema()}; + }; + } + + if (table == "__mock_table_4") { + return [plan](size_t cursor) { + std::vector values{}; + values.reserve(2); + if (cursor % 5 != 0) { + values.push_back(ValueFactory::GetIntegerValue(cursor % 5)); + } else { + values.push_back(ValueFactory::GetNullValueByType(TypeId::INTEGER)); + } + + std::string str = "\U0001F4A9"; // the poop emoji + for (size_t i = 0; i < (cursor % 3); i++) { + str += "\U0001F4A9"; + } + if (cursor % 10 != 0) { + values.push_back(ValueFactory::GetVarcharValue(str)); + } else { + values.push_back(ValueFactory::GetNullValueByType(TypeId::VARCHAR)); + } + + return Tuple{values, &plan->OutputSchema()}; + }; + } + if (table == "__mock_agg_input_small") { return [plan](size_t cursor) { std::vector values{}; @@ -480,6 +594,46 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function }; } + if (table == "__mock_t8") { + return [plan](size_t cursor) { + std::vector values{}; + values.push_back(ValueFactory::GetIntegerValue(cursor)); + return Tuple{values, &plan->OutputSchema()}; + }; + } + + if (MockRandomValuesEnabled() && table == "__mock_t7") { + auto rng = MakeTableRng(table); + const Schema *schema = &plan->OutputSchema(); + + constexpr int32_t k_groups = 100; + constexpr int32_t k_val_domain = 1000000; + + return [schema, rng](size_t /*cursor*/) mutable { + auto v = static_cast(rng() % k_groups); + auto v1 = static_cast(rng() % k_val_domain); + auto v2 = static_cast(rng() % k_val_domain); + std::vector values; + values.emplace_back(ValueFactory::GetIntegerValue(v)); + values.emplace_back(ValueFactory::GetIntegerValue(v1)); + values.emplace_back(ValueFactory::GetIntegerValue(v2)); + return Tuple{values, schema}; + }; + } + + if (MockRandomValuesEnabled()) { + auto rng = MakeTableRng(table); + const Schema *schema = &plan->OutputSchema(); + return [schema, rng](size_t /*cursor*/) mutable { + std::vector values; + values.reserve(schema->GetColumnCount()); + for (const auto &col : schema->GetColumns()) { + values.emplace_back(RandomValueForColumn(col, rng)); + } + return Tuple{values, schema}; + }; + } + if (table == "__mock_t4_1m") { return [plan](size_t cursor) { std::vector values{}; @@ -520,14 +674,6 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function }; } - if (table == "__mock_t8") { - return [plan](size_t cursor) { - std::vector values{}; - values.push_back(ValueFactory::GetIntegerValue(cursor)); - return Tuple{values, &plan->OutputSchema()}; - }; - } - if (table == "__mock_t9") { return [plan](size_t cursor) { std::vector values{}; @@ -591,24 +737,33 @@ void MockScanExecutor::Init() { } /** - * Yield the next tuple from the sequential scan. - * @param[out] tuple The next tuple produced by the scan - * @param[out] rid The next tuple RID produced by the scan + * Yield the next tuple batch from the scan. + * @param[out] tuple_batch The next tuple batch produced by the scan + * @param[out] rid_batch The next tuple RID batch produced by the scan + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto MockScanExecutor::Next(Tuple *tuple, RID *rid) -> bool { - if (cursor_ == size_) { - // Scan complete - return EXECUTOR_EXHAUSTED; - } - if (shuffled_idx_.empty()) { - *tuple = func_(cursor_); - } else { - *tuple = func_(shuffled_idx_[cursor_]); - } - ++cursor_; - *rid = MakeDummyRID(); - return EXECUTOR_ACTIVE; +auto MockScanExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + tuple_batch->clear(); + rid_batch->clear(); + + while (tuple_batch->size() < batch_size && cursor_ < size_) { + Tuple tuple{}; + RID rid{}; + if (shuffled_idx_.empty()) { + tuple = func_(cursor_); + } else { + tuple = func_(shuffled_idx_[cursor_]); + } + ++cursor_; + rid = MakeDummyRID(); + + tuple_batch->push_back(tuple); + rid_batch->push_back(rid); + } + + return !tuple_batch->empty(); } /** @return A dummy RID value */ diff --git a/src/execution/nested_index_join_executor.cpp b/src/execution/nested_index_join_executor.cpp index 36f49a257..a0e1abb0e 100644 --- a/src/execution/nested_index_join_executor.cpp +++ b/src/execution/nested_index_join_executor.cpp @@ -33,6 +33,9 @@ NestedIndexJoinExecutor::NestedIndexJoinExecutor(ExecutorContext *exec_ctx, cons void NestedIndexJoinExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } -auto NestedIndexJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } +auto NestedIndexJoinExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + UNIMPLEMENTED("TODO(P3): Add implementation."); +} } // namespace bustub diff --git a/src/execution/nested_loop_join_executor.cpp b/src/execution/nested_loop_join_executor.cpp index b97a9b2a9..9eb6d2964 100644 --- a/src/execution/nested_loop_join_executor.cpp +++ b/src/execution/nested_loop_join_executor.cpp @@ -39,11 +39,15 @@ NestedLoopJoinExecutor::NestedLoopJoinExecutor(ExecutorContext *exec_ctx, const void NestedLoopJoinExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** - * Yield the next tuple from the join. - * @param[out] tuple The next tuple produced by the join - * @param[out] rid The next tuple RID produced, not used by nested loop join. - * @return `true` if a tuple was produced, `false` if there are no more tuples. + * Yield the next tuple batch from the join. + * @param[out] tuple_batch The next tuple batch produced by the join + * @param[out] rid_batch The next tuple RID batch produced by the join + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) + * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto NestedLoopJoinExecutor::Next(Tuple *tuple, RID *rid) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } +auto NestedLoopJoinExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + UNIMPLEMENTED("TODO(P3): Add implementation."); +} } // namespace bustub diff --git a/src/execution/projection_executor.cpp b/src/execution/projection_executor.cpp index 2764f4b9b..ef601a9f5 100644 --- a/src/execution/projection_executor.cpp +++ b/src/execution/projection_executor.cpp @@ -31,30 +31,74 @@ void ProjectionExecutor::Init() { } /** - * Yield the next tuple from the projection. - * @param[out] tuple The next tuple produced by the projection - * @param[out] rid The next tuple RID produced by the projection + * Yield the next tuple batch from the projection. + * @param[out] tuple_batch The next tuple batch produced by the projection + * @param[out] rid_batch The next tuple RID batch produced by the projection + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto ProjectionExecutor::Next(Tuple *tuple, RID *rid) -> bool { - Tuple child_tuple{}; +auto ProjectionExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) -> bool { + tuple_batch->clear(); + rid_batch->clear(); - // Get the next tuple - const auto status = child_executor_->Next(&child_tuple, rid); + if (child_offset_ != 0) { + for (size_t i = child_offset_; i < child_tuples_.size(); i++) { + auto child_tuple = child_tuples_[i]; + auto child_rid = child_rids_[i]; - if (!status) { + // Compute expressions + std::vector values{}; + values.reserve(GetOutputSchema().GetColumnCount()); + for (const auto &expr : plan_->GetExpressions()) { + values.push_back(expr->Evaluate(&child_tuple, child_executor_->GetOutputSchema())); + } + + tuple_batch->push_back(Tuple{values, &GetOutputSchema()}); + rid_batch->push_back(child_rid); + } + } + + child_offset_ = 0; + + const auto status = child_executor_->Next(&child_tuples_, &child_rids_, batch_size); + + // If no more tuples and output batch is empty, return false + if (!status && tuple_batch->empty()) { return false; } - // Compute expressions - std::vector values{}; - values.reserve(GetOutputSchema().GetColumnCount()); - for (const auto &expr : plan_->GetExpressions()) { - values.push_back(expr->Evaluate(&child_tuple, child_executor_->GetOutputSchema())); + // If no more tuples but output batch is not empty, return true + if (!status && !tuple_batch->empty()) { + return true; } - *tuple = Tuple{values, &GetOutputSchema()}; + for (size_t i = 0; i < child_tuples_.size(); i++) { + auto child_tuple = child_tuples_[i]; + auto child_rid = child_rids_[i]; + + // Compute expressions + std::vector values{}; + values.reserve(GetOutputSchema().GetColumnCount()); + for (const auto &expr : plan_->GetExpressions()) { + values.push_back(expr->Evaluate(&child_tuple, child_executor_->GetOutputSchema())); + } + + tuple_batch->push_back(Tuple{values, &GetOutputSchema()}); + rid_batch->push_back(child_rid); + + if (tuple_batch->size() >= batch_size) { + // If we have filled the output batch but not yet reached the end of the current child batch, update the offset + // and return + if (i + 1 < child_tuples_.size()) { + child_offset_ = i + 1; + } else { + child_offset_ = 0; + } + + return true; + } + } - return true; + return !tuple_batch->empty(); } } // namespace bustub diff --git a/src/execution/seq_scan_executor.cpp b/src/execution/seq_scan_executor.cpp index 7c715b778..8bc9ee50a 100644 --- a/src/execution/seq_scan_executor.cpp +++ b/src/execution/seq_scan_executor.cpp @@ -28,11 +28,15 @@ SeqScanExecutor::SeqScanExecutor(ExecutorContext *exec_ctx, const SeqScanPlanNod void SeqScanExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** - * Yield the next tuple from the sequential scan. - * @param[out] tuple The next tuple produced by the scan - * @param[out] rid The next tuple RID produced by the scan + * Yield the next tuple batch from the seq scan. + * @param[out] tuple_batch The next tuple batch produced by the scan + * @param[out] rid_batch The next tuple RID batch produced by the scan + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto SeqScanExecutor::Next(Tuple *tuple, RID *rid) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } +auto SeqScanExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + UNIMPLEMENTED("TODO(P3): Add implementation."); +} } // namespace bustub diff --git a/src/execution/sort_executor.cpp b/src/execution/sort_executor.cpp index b42a2b63a..e5ac2aaca 100644 --- a/src/execution/sort_executor.cpp +++ b/src/execution/sort_executor.cpp @@ -27,11 +27,15 @@ SortExecutor::SortExecutor(ExecutorContext *exec_ctx, const SortPlanNode *plan, void SortExecutor::Init() { throw NotImplementedException("SortExecutor is not implemented"); } /** - * Yield the next tuple from the sort. - * @param[out] tuple The next tuple produced by the sort - * @param[out] rid The next tuple RID produced by the sort + * Yield the next tuple batch from the sort. + * @param[out] tuple_batch The next tuple batch produced by the sort + * @param[out] rid_batch The next tuple RID batch produced by the sort + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto SortExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto SortExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool { + return false; +} } // namespace bustub diff --git a/src/execution/topn_check_executor.cpp b/src/execution/topn_check_executor.cpp index 4cd55ae49..46bb1f2e7 100644 --- a/src/execution/topn_check_executor.cpp +++ b/src/execution/topn_check_executor.cpp @@ -39,12 +39,16 @@ void TopNCheckExecutor::Init() { } /** - * Yield the next tuple from the child executor. - * @param[out] tuple The next tuple produced by the child executor - * @param[out] rid The next tuple RID produced by the child executor + * Yield the next tuple batch from the child executor. + * @param[out] tuple_batch The next tuple batch produced by the child executor + * @param[out] rid_batch The next tuple RID batch produced by the child executor + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto TopNCheckExecutor::Next(Tuple *tuple, RID *rid) -> bool { +auto TopNCheckExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) -> bool { + tuple_batch->clear(); + rid_batch->clear(); + if (!child_executor_) { return EXECUTOR_EXHAUSTED; } @@ -55,7 +59,7 @@ auto TopNCheckExecutor::Next(Tuple *tuple, RID *rid) -> bool { } prev_ = topn_executor_->GetNumInHeap(); // Emit the next tuple - return child_executor_->Next(tuple, rid); + return child_executor_->Next(tuple_batch, rid_batch, batch_size); } } // namespace bustub diff --git a/src/execution/topn_executor.cpp b/src/execution/topn_executor.cpp index 5f1bb0bb8..6eb6b5114 100644 --- a/src/execution/topn_executor.cpp +++ b/src/execution/topn_executor.cpp @@ -27,12 +27,16 @@ TopNExecutor::TopNExecutor(ExecutorContext *exec_ctx, const TopNPlanNode *plan, void TopNExecutor::Init() { throw NotImplementedException("TopNExecutor is not implemented"); } /** - * Yield the next tuple from the TopN. - * @param[out] tuple The next tuple produced by the TopN - * @param[out] rid The next tuple RID produced by the TopN + * Yield the next tuple batch from the TopN. + * @param[out] tuple_batch The next tuple batch produced by the TopN + * @param[out] rid_batch The next tuple RID batch produced by the TopN + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto TopNExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto TopNExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool { + return false; +} auto TopNExecutor::GetNumInHeap() -> size_t { throw NotImplementedException("TopNExecutor is not implemented"); }; diff --git a/src/execution/topn_per_group_executor.cpp b/src/execution/topn_per_group_executor.cpp index 3daa13698..0fdb52cb3 100644 --- a/src/execution/topn_per_group_executor.cpp +++ b/src/execution/topn_per_group_executor.cpp @@ -27,11 +27,15 @@ TopNPerGroupExecutor::TopNPerGroupExecutor(ExecutorContext *exec_ctx, const TopN void TopNPerGroupExecutor::Init() { throw NotImplementedException("TopNPerGroupExecutor is not implemented"); } /** - * Yield the next tuple from the TopNPerGroup. - * @param[out] tuple The next tuple produced by the TopNPerGroup - * @param[out] rid The next tuple RID produced by the TopNPerGroup + * Yield the next tuple batch from the TopNPerGroup. + * @param[out] tuple_batch The next tuple batch produced by the TopNPerGroup + * @param[out] rid_batch The next tuple RID batch produced by the TopNPerGroup + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto TopNPerGroupExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto TopNPerGroupExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + return false; +} } // namespace bustub diff --git a/src/execution/update_executor.cpp b/src/execution/update_executor.cpp index 6a0b0a724..d4b9804f9 100644 --- a/src/execution/update_executor.cpp +++ b/src/execution/update_executor.cpp @@ -33,14 +33,17 @@ UpdateExecutor::UpdateExecutor(ExecutorContext *exec_ctx, const UpdatePlanNode * void UpdateExecutor::Init() { UNIMPLEMENTED("TODO(P3): Add implementation."); } /** - * Yield the next tuple from the update. - * @param[out] tuple The next tuple produced by the update - * @param[out] rid The next tuple RID produced by the update (ignore this) + * Yield the number of rows updated in the table. + * @param[out] tuple_batch The tuple batch with one integer indicating the number of rows updated in the table + * @param[out] rid_batch The next tuple RID batch produced by the update (ignore, not used) + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples * - * NOTE: UpdateExecutor::Next() does not use the `rid` out-parameter. + * NOTE: UpdateExecutor::Next() does not use the `rid_batch` out-parameter. + * NOTE: UpdateExecutor::Next() returns true with the number of updated rows produced only once. */ -auto UpdateExecutor::Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool { +auto UpdateExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { UNIMPLEMENTED("TODO(P3): Add implementation."); } diff --git a/src/execution/values_executor.cpp b/src/execution/values_executor.cpp index 64ea5326b..707928409 100644 --- a/src/execution/values_executor.cpp +++ b/src/execution/values_executor.cpp @@ -26,28 +26,31 @@ ValuesExecutor::ValuesExecutor(ExecutorContext *exec_ctx, const ValuesPlanNode * void ValuesExecutor::Init() { cursor_ = 0; } /** - * Yield the next tuple from the values. - * @param[out] tuple The next tuple produced by the values - * @param[out] rid The next tuple RID produced by the values, not used by values executor + * Yield the next tuple batch from the values. + * @param[out] tuple_batch The next tuple batch produced by the values + * @param[out] rid_batch The next tuple RID batch produced by the values + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto ValuesExecutor::Next(Tuple *tuple, RID *rid) -> bool { - if (cursor_ >= plan_->GetValues().size()) { - return false; +auto ValuesExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + tuple_batch->clear(); + rid_batch->clear(); + + while (tuple_batch->size() < batch_size && cursor_ < plan_->GetValues().size()) { + std::vector values{}; + values.reserve(GetOutputSchema().GetColumnCount()); + + const auto &row_expr = plan_->GetValues()[cursor_]; + for (const auto &col : row_expr) { + values.push_back(col->Evaluate(nullptr, dummy_schema_)); + } + + tuple_batch->emplace_back(values, &GetOutputSchema()); + rid_batch->emplace_back(RID{}); + cursor_ += 1; } - - std::vector values{}; - values.reserve(GetOutputSchema().GetColumnCount()); - - const auto &row_expr = plan_->GetValues()[cursor_]; - for (const auto &col : row_expr) { - values.push_back(col->Evaluate(nullptr, dummy_schema_)); - } - - *tuple = Tuple{values, &GetOutputSchema()}; - cursor_ += 1; - - return true; + return !tuple_batch->empty(); } } // namespace bustub diff --git a/src/execution/window_function_executor.cpp b/src/execution/window_function_executor.cpp index 75897c0f3..f7714e8e7 100644 --- a/src/execution/window_function_executor.cpp +++ b/src/execution/window_function_executor.cpp @@ -29,10 +29,14 @@ WindowFunctionExecutor::WindowFunctionExecutor(ExecutorContext *exec_ctx, const void WindowFunctionExecutor::Init() { throw NotImplementedException("WindowFunctionExecutor is not implemented"); } /** - * Yield the next tuple from the window aggregation. - * @param[out] tuple The next tuple produced by the window aggregation - * @param[out] rid The next tuple RID produced by the window aggregation + * Yield the next tuple batch from the window aggregation. + * @param[out] tuple_batch The next tuple batch produced by the window aggregation + * @param[out] rid_batch The next tuple RID batch produced by the window aggregation + * @param batch_size The number of tuples to be included in the batch (default: BUSTUB_BATCH_SIZE) * @return `true` if a tuple was produced, `false` if there are no more tuples */ -auto WindowFunctionExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; } +auto WindowFunctionExecutor::Next(std::vector *tuple_batch, std::vector *rid_batch, + size_t batch_size) -> bool { + return false; +} } // namespace bustub diff --git a/src/include/binder/bound_order_by.h b/src/include/binder/bound_order_by.h index ca908b0a1..e7c0120ca 100644 --- a/src/include/binder/bound_order_by.h +++ b/src/include/binder/bound_order_by.h @@ -14,6 +14,7 @@ #include #include +#include #include #include "binder/bound_expression.h" @@ -33,24 +34,38 @@ enum class OrderByType : uint8_t { DESC = 3, /**< Descending order by type. */ }; -using OrderBy = std::pair; +/** + * All types of order by nulls in binder. + */ +enum class OrderByNullType : uint8_t { + DEFAULT = 0, /**< Default order by type. */ + NULLS_FIRST = 1, /**< Ascending order by type. */ + NULLS_LAST = 2, /**< Descending order by type. */ +}; + +using OrderBy = std::tuple; /** * BoundOrderBy is an item in the ORDER BY clause. */ class BoundOrderBy { public: - explicit BoundOrderBy(OrderByType type, std::unique_ptr expr) - : type_(type), expr_(std::move(expr)) {} + BoundOrderBy(OrderByType type, OrderByNullType null_order, std::unique_ptr expr) + : type_(type), null_order_(null_order), expr_(std::move(expr)) {} - /** The order by type. */ + /** The order by type (ASC/DESC/DEFAULT). */ OrderByType type_; + /** The null ordering (NULLS FIRST/LAST/DEFAULT). */ + OrderByNullType null_order_; + /** The order by expression */ std::unique_ptr expr_; /** Render this statement as a string. */ - auto ToString() const -> std::string { return fmt::format("BoundOrderBy {{ type={}, expr={} }}", type_, expr_); } + auto ToString() const -> std::string { + return fmt::format("BoundOrderBy {{ type={}, nulls={}, expr={} }}", type_, null_order_, expr_); + } }; } // namespace bustub @@ -98,3 +113,26 @@ struct fmt::formatter : formatter { return formatter::format(name, ctx); } }; + +template <> +struct fmt::formatter : fmt::formatter { + template + auto format(bustub::OrderByNullType c, FormatContext &ctx) const { + std::string_view name; + switch (c) { + case bustub::OrderByNullType::DEFAULT: + name = "Default"; + break; + case bustub::OrderByNullType::NULLS_FIRST: + name = "NullsFirst"; + break; + case bustub::OrderByNullType::NULLS_LAST: + name = "NullsLast"; + break; + default: + name = "Unknown"; + break; + } + return fmt::formatter::format(name, ctx); + } +}; diff --git a/src/include/common/config.h b/src/include/common/config.h index 1b99bd248..2aa47b0b8 100644 --- a/src/include/common/config.h +++ b/src/include/common/config.h @@ -41,6 +41,7 @@ static constexpr int DEFAULT_DB_IO_SIZE = 16; static constexpr int LOG_BUFFER_SIZE = ((BUFFER_POOL_SIZE + 1) * BUSTUB_PAGE_SIZE); // size of a log buffer in byte static constexpr int BUCKET_SIZE = 50; // size of extendible hash bucket static constexpr int LRUK_REPLACER_K = 10; // backward k-distance for lru-k +static constexpr int BUSTUB_BATCH_SIZE = 20; // number of tuples processed in a batch using frame_id_t = int32_t; // frame id type using page_id_t = int32_t; // page id type diff --git a/src/include/execution/execution_engine.h b/src/include/execution/execution_engine.h index e3b81fe52..a37663065 100644 --- a/src/include/execution/execution_engine.h +++ b/src/include/execution/execution_engine.h @@ -94,11 +94,11 @@ class ExecutionEngine { */ static void PollExecutor(AbstractExecutor *executor, const AbstractPlanNodeRef &plan, std::vector *result_set) { - RID rid{}; - Tuple tuple{}; - while (executor->Next(&tuple, &rid)) { + std::vector rids{}; + std::vector tuples{}; + while (executor->Next(&tuples, &rids, BUSTUB_BATCH_SIZE)) { if (result_set != nullptr) { - result_set->push_back(tuple); + result_set->insert(result_set->end(), tuples.begin(), tuples.end()); } } } diff --git a/src/include/execution/executors/abstract_executor.h b/src/include/execution/executors/abstract_executor.h index dca7130ff..111fd8946 100644 --- a/src/include/execution/executors/abstract_executor.h +++ b/src/include/execution/executors/abstract_executor.h @@ -12,6 +12,8 @@ #pragma once +#include + #include "execution/executor_context.h" #include "storage/table/tuple.h" @@ -45,7 +47,8 @@ class AbstractExecutor { * @param[out] rid The next tuple RID produced by this executor * @return `true` if a tuple was produced, `false` if there are no more tuples */ - virtual auto Next(Tuple *tuple, RID *rid) -> bool = 0; + virtual auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool = 0; /** @return The schema of the tuples that this executor produces */ virtual auto GetOutputSchema() const -> const Schema & = 0; diff --git a/src/include/execution/executors/aggregation_executor.h b/src/include/execution/executors/aggregation_executor.h index ea4b8d9f6..bd6f7f159 100644 --- a/src/include/execution/executors/aggregation_executor.h +++ b/src/include/execution/executors/aggregation_executor.h @@ -154,7 +154,8 @@ class AggregationExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the aggregation */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/delete_executor.h b/src/include/execution/executors/delete_executor.h index 725cd1b82..381d5070b 100644 --- a/src/include/execution/executors/delete_executor.h +++ b/src/include/execution/executors/delete_executor.h @@ -34,7 +34,8 @@ class DeleteExecutor : public AbstractExecutor { void Init() override; - auto Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the delete */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/external_merge_sort_executor.h b/src/include/execution/executors/external_merge_sort_executor.h index 3e35a1f45..26fcd1dfe 100644 --- a/src/include/execution/executors/external_merge_sort_executor.h +++ b/src/include/execution/executors/external_merge_sort_executor.h @@ -21,28 +21,11 @@ #include "execution/execution_common.h" #include "execution/executors/abstract_executor.h" #include "execution/plans/sort_plan.h" +#include "storage/page/intermediate_result_page.h" #include "storage/table/tuple.h" namespace bustub { -/** - * Page to hold the intermediate data for external merge sort. - * - * Only fixed-length data will be supported in Spring 2025. - */ -class SortPage { - public: - /** - * TODO(P3): Define and implement the methods for reading data from and writing data to the sort - * page. Feel free to add other helper methods. - */ - private: - /** - * TODO(P3): Define the private members. You may want to have some necessary metadata for - * the sort page before the start of the actual data. - */ -}; - /** * A data structure that holds the sorted tuples as a run during external merge sort. * Tuples might be stored in multiple pages, and tuples are ordered both within one page @@ -131,7 +114,8 @@ class ExternalMergeSortExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the external merge sort */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/filter_executor.h b/src/include/execution/executors/filter_executor.h index 3359e86d7..a65e46b02 100644 --- a/src/include/execution/executors/filter_executor.h +++ b/src/include/execution/executors/filter_executor.h @@ -33,7 +33,8 @@ class FilterExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the filter plan */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } @@ -44,5 +45,12 @@ class FilterExecutor : public AbstractExecutor { /** The child executor from which tuples are obtained */ std::unique_ptr child_executor_; + + /** child tuple batch & child RID batch */ + std::vector child_tuples_{}; + std::vector child_rids_{}; + + /** child tuple batch offset */ + size_t child_offset_ = 0; }; } // namespace bustub diff --git a/src/include/execution/executors/hash_join_executor.h b/src/include/execution/executors/hash_join_executor.h index 17b5d0fe0..f3609ab5f 100644 --- a/src/include/execution/executors/hash_join_executor.h +++ b/src/include/execution/executors/hash_join_executor.h @@ -13,6 +13,7 @@ #pragma once #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -31,7 +32,8 @@ class HashJoinExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the join */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/index_scan_executor.h b/src/include/execution/executors/index_scan_executor.h index 3609aeea4..ee6afe764 100644 --- a/src/include/execution/executors/index_scan_executor.h +++ b/src/include/execution/executors/index_scan_executor.h @@ -12,6 +12,8 @@ #pragma once +#include + #include "common/rid.h" #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -32,7 +34,8 @@ class IndexScanExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; private: /** The index scan plan node to be executed. */ diff --git a/src/include/execution/executors/init_check_executor.h b/src/include/execution/executors/init_check_executor.h index 6f3d3abbe..f7a6a493d 100644 --- a/src/include/execution/executors/init_check_executor.h +++ b/src/include/execution/executors/init_check_executor.h @@ -14,6 +14,7 @@ #include #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -30,7 +31,8 @@ class InitCheckExecutor : public AbstractExecutor { std::unique_ptr &&child_executor); void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the child executor */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/insert_executor.h b/src/include/execution/executors/insert_executor.h index 210048d1e..d243275e2 100644 --- a/src/include/execution/executors/insert_executor.h +++ b/src/include/execution/executors/insert_executor.h @@ -13,6 +13,7 @@ #pragma once #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -32,7 +33,8 @@ class InsertExecutor : public AbstractExecutor { void Init() override; - auto Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the insert */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/limit_executor.h b/src/include/execution/executors/limit_executor.h index f6030b4fd..35620dc3c 100644 --- a/src/include/execution/executors/limit_executor.h +++ b/src/include/execution/executors/limit_executor.h @@ -14,6 +14,7 @@ #include #include +#include #include "execution/executors/abstract_executor.h" #include "execution/plans/limit_plan.h" @@ -30,7 +31,8 @@ class LimitExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the limit */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/mock_scan_executor.h b/src/include/execution/executors/mock_scan_executor.h index 98faa7c4a..f8ff8b119 100644 --- a/src/include/execution/executors/mock_scan_executor.h +++ b/src/include/execution/executors/mock_scan_executor.h @@ -34,7 +34,8 @@ class MockScanExecutor : public AbstractExecutor { MockScanExecutor(ExecutorContext *exec_ctx, const MockScanPlanNode *plan); void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the sequential scan */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/nested_index_join_executor.h b/src/include/execution/executors/nested_index_join_executor.h index 5454e4c1f..7b5073fed 100644 --- a/src/include/execution/executors/nested_index_join_executor.h +++ b/src/include/execution/executors/nested_index_join_executor.h @@ -13,6 +13,7 @@ #pragma once #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -34,7 +35,8 @@ class NestedIndexJoinExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; private: /** The nested index join plan node. */ diff --git a/src/include/execution/executors/nested_loop_join_executor.h b/src/include/execution/executors/nested_loop_join_executor.h index 6186535f1..ea69b5ae0 100644 --- a/src/include/execution/executors/nested_loop_join_executor.h +++ b/src/include/execution/executors/nested_loop_join_executor.h @@ -13,6 +13,7 @@ #pragma once #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -32,7 +33,8 @@ class NestedLoopJoinExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the insert */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/projection_executor.h b/src/include/execution/executors/projection_executor.h index da2d64fa9..f6a8d58cc 100644 --- a/src/include/execution/executors/projection_executor.h +++ b/src/include/execution/executors/projection_executor.h @@ -32,7 +32,7 @@ class ProjectionExecutor : public AbstractExecutor { std::unique_ptr &&child_executor); void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) -> bool override; /** @return The output schema for the projection plan */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } @@ -43,5 +43,12 @@ class ProjectionExecutor : public AbstractExecutor { /** The child executor from which tuples are obtained */ std::unique_ptr child_executor_; + + /** child tuple batch & child RID batch */ + std::vector child_tuples_{}; + std::vector child_rids_{}; + + /** child tuple batch offset */ + size_t child_offset_ = 0; }; } // namespace bustub diff --git a/src/include/execution/executors/seq_scan_executor.h b/src/include/execution/executors/seq_scan_executor.h index 452c7e15b..2879ba562 100644 --- a/src/include/execution/executors/seq_scan_executor.h +++ b/src/include/execution/executors/seq_scan_executor.h @@ -12,6 +12,8 @@ #pragma once +#include + #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" #include "execution/plans/seq_scan_plan.h" @@ -28,7 +30,8 @@ class SeqScanExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the sequential scan */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/sort_executor.h b/src/include/execution/executors/sort_executor.h index e2ccdb0be..6430c9f33 100644 --- a/src/include/execution/executors/sort_executor.h +++ b/src/include/execution/executors/sort_executor.h @@ -13,6 +13,7 @@ #pragma once #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -30,7 +31,8 @@ class SortExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the sort */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/topn_check_executor.h b/src/include/execution/executors/topn_check_executor.h index a07d57ae7..824787bc6 100644 --- a/src/include/execution/executors/topn_check_executor.h +++ b/src/include/execution/executors/topn_check_executor.h @@ -14,6 +14,7 @@ #include #include +#include #include "execution/executors/abstract_executor.h" #include "execution/executors/topn_executor.h" @@ -30,7 +31,7 @@ class TopNCheckExecutor : public AbstractExecutor { std::unique_ptr &&child_executor, TopNExecutor *topn_executor); void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) -> bool override; /** @return The output schema for the child executor */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); }; diff --git a/src/include/execution/executors/topn_executor.h b/src/include/execution/executors/topn_executor.h index 0ae7f1cd9..7e69ccb03 100644 --- a/src/include/execution/executors/topn_executor.h +++ b/src/include/execution/executors/topn_executor.h @@ -33,7 +33,8 @@ class TopNExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the TopN */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/topn_per_group_executor.h b/src/include/execution/executors/topn_per_group_executor.h index cda52669e..47dac36ef 100644 --- a/src/include/execution/executors/topn_per_group_executor.h +++ b/src/include/execution/executors/topn_per_group_executor.h @@ -33,7 +33,7 @@ class TopNPerGroupExecutor : public AbstractExecutor { std::unique_ptr &&child_executor); void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) -> bool override; auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/update_executor.h b/src/include/execution/executors/update_executor.h index f97afcbe4..eb91fe25d 100644 --- a/src/include/execution/executors/update_executor.h +++ b/src/include/execution/executors/update_executor.h @@ -13,6 +13,7 @@ #pragma once #include +#include #include "execution/executor_context.h" #include "execution/executors/abstract_executor.h" @@ -34,7 +35,8 @@ class UpdateExecutor : public AbstractExecutor { void Init() override; - auto Next([[maybe_unused]] Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the update */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/values_executor.h b/src/include/execution/executors/values_executor.h index 6b81bcfb3..c33c1e0ad 100644 --- a/src/include/execution/executors/values_executor.h +++ b/src/include/execution/executors/values_executor.h @@ -30,7 +30,7 @@ class ValuesExecutor : public AbstractExecutor { ValuesExecutor(ExecutorContext *exec_ctx, const ValuesPlanNode *plan); void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) -> bool override; /** @return The output schema for the values */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/executors/window_function_executor.h b/src/include/execution/executors/window_function_executor.h index da961d19c..691c3c7e8 100644 --- a/src/include/execution/executors/window_function_executor.h +++ b/src/include/execution/executors/window_function_executor.h @@ -67,7 +67,8 @@ class WindowFunctionExecutor : public AbstractExecutor { void Init() override; - auto Next(Tuple *tuple, RID *rid) -> bool override; + auto Next(std::vector *tuple_batch, std::vector *rid_batch, size_t batch_size) + -> bool override; /** @return The output schema for the window aggregation plan */ auto GetOutputSchema() const -> const Schema & override { return plan_->OutputSchema(); } diff --git a/src/include/execution/plans/aggregation_plan.h b/src/include/execution/plans/aggregation_plan.h index d143ad5ca..811fbbf5b 100644 --- a/src/include/execution/plans/aggregation_plan.h +++ b/src/include/execution/plans/aggregation_plan.h @@ -98,7 +98,7 @@ struct AggregateKey { std::vector group_bys_; /** - * Compares two aggregate keys for equality. + * Compares two aggregate keys for equality. TODO(p3): you may need to change this to handle NULLs. * @param other the other aggregate key to be compared with * @return `true` if both aggregate keys have equivalent group-by expressions, `false` otherwise */ diff --git a/src/include/storage/page/intermediate_result_page.h b/src/include/storage/page/intermediate_result_page.h new file mode 100644 index 000000000..4b219a2bf --- /dev/null +++ b/src/include/storage/page/intermediate_result_page.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include +#include + +#include "storage/table/tuple.h" + +namespace bustub { + +/** + * Page to hold the intermediate data for external merge sort and hash join. + * Supports variable-length tuples. + */ +class IntermediateResultPage { + public: + /** + * TODO(P3): Define and implement the methods for reading data from and writing data to the sort + * page. Feel free to add other helper methods. + */ + private: + /** + * TODO(P3): Define the private members. You may want to have some necessary metadata for + * the sort page before the start of the actual data. + */ +}; + +} // namespace bustub diff --git a/src/optimizer/order_by_index_scan.cpp b/src/optimizer/order_by_index_scan.cpp index 7a8bf6c4b..7e2a17d0d 100644 --- a/src/optimizer/order_by_index_scan.cpp +++ b/src/optimizer/order_by_index_scan.cpp @@ -48,7 +48,7 @@ auto Optimizer::OptimizeOrderByAsIndexScan(const AbstractPlanNodeRef &plan) -> A const auto &order_bys = sort_plan.GetOrderBy(); std::vector order_by_column_ids; - for (const auto &[order_type, expr] : order_bys) { + for (const auto &[order_type, order_null, expr] : order_bys) { // Order type is asc or default if (order_type != OrderByType::ASC && order_type != OrderByType::DEFAULT) { return optimized_plan; diff --git a/src/planner/plan_select.cpp b/src/planner/plan_select.cpp index b77a8504d..4792e4eb8 100644 --- a/src/planner/plan_select.cpp +++ b/src/planner/plan_select.cpp @@ -133,7 +133,7 @@ auto Planner::PlanSelect(const SelectStatement &statement) -> AbstractPlanNodeRe for (const auto &order_by : statement.sort_) { auto [_, expr] = PlanExpression(*order_by->expr_, {plan}); auto abstract_expr = std::move(expr); - order_bys.emplace_back(order_by->type_, abstract_expr); + order_bys.emplace_back(order_by->type_, order_by->null_order_, abstract_expr); } plan = std::make_shared(std::make_shared(plan->OutputSchema()), plan, std::move(order_bys)); } diff --git a/src/planner/plan_window_function.cpp b/src/planner/plan_window_function.cpp index 8cb7d08bc..b62538ea1 100644 --- a/src/planner/plan_window_function.cpp +++ b/src/planner/plan_window_function.cpp @@ -52,10 +52,16 @@ void CheckOrderByCompatible(const std::vector> &order_by_ex throw Exception("order by clause of window functions are not compatible"); } for (uint32_t i = 0; i < order_by.size(); i++) { - if (order_by[i].first != first_order_by[i].first) { + const auto &[first_order_by_type, first_order_null, first_expr] = first_order_by[i]; + const auto &[order_by_type, order_null, expr] = order_by[i]; + + if (order_by_type != first_order_by_type) { + throw Exception("order by clause of window functions are not compatible"); + } + if (order_null != first_order_null) { throw Exception("order by clause of window functions are not compatible"); } - if (order_by[i].second->ToString() != first_order_by[i].second->ToString()) { + if (expr->ToString() != first_expr->ToString()) { throw Exception("order by clause of window functions are not compatible"); } } @@ -126,7 +132,7 @@ auto Planner::PlanSelectWindow(const SelectStatement &statement, AbstractPlanNod for (const auto &item : window_call.order_bys_) { auto [_, expr] = PlanExpression(*item->expr_, {child}); auto abstract_expr = std::move(expr); - order_by.emplace_back(item->type_, abstract_expr); + order_by.emplace_back(item->type_, item->null_order_, abstract_expr); } order_by_exprs.emplace_back(std::move(order_by)); diff --git a/test/sql/p3.00-primer.slt b/test/sql/p3.00-primer.slt index e01cd21f6..00c33ea16 100644 --- a/test/sql/p3.00-primer.slt +++ b/test/sql/p3.00-primer.slt @@ -1,11 +1,11 @@ query rowsort -select github_id, office_hour from __mock_table_tas_2025_spring; +select github_id, office_hour from __mock_table_tas_2025_fall; ---- -AlSchlo Friday -ChrisLaspias Wednesday -carpecodeum Monday -hyoungjook Tuesday -joesunil123 Friday -mrwhitezz Thursday -rmboyce Monday -yliang412 Tuesday +17zhangw Tuesday +quantumish Monday +songwdfu Thursday +notSaranshMalik Friday +shinyumh Tuesday +s-wangru Tuesday +rayhhome Friday +MrWhitezz Wednesday diff --git a/test/sql/p3.07-simple-agg.slt b/test/sql/p3.07-simple-agg.slt index 821989095..59ec80541 100644 --- a/test/sql/p3.07-simple-agg.slt +++ b/test/sql/p3.07-simple-agg.slt @@ -1,8 +1,8 @@ # 4 pts -# How many TAs are there in 2024 Spring? +# How many TAs are there in 2025 Fall? query -select count(*) from __mock_table_tas_2025_spring; +select count(*) from __mock_table_tas_2025_fall; ---- 8 diff --git a/test/sql/p3.08-group-agg-1.slt b/test/sql/p3.08-group-agg-1.slt index 2e9c5db59..681b6273e 100644 --- a/test/sql/p3.08-group-agg-1.slt +++ b/test/sql/p3.08-group-agg-1.slt @@ -4,11 +4,11 @@ # "rowsort" means that the order of result doesn't matter. query rowsort -select office_hour, count(*) from __mock_table_tas_2025_spring group by office_hour; +select office_hour, count(*) from __mock_table_tas_2025_fall group by office_hour; ---- -Tuesday 2 +Tuesday 3 Friday 2 -Monday 2 +Monday 1 Wednesday 1 Thursday 1 @@ -107,3 +107,27 @@ select v4+v5+count(*), sum(v1+v2), min(v3+v4), count(*) from t1 ---- 339 65400 6 100 340 75400 7 100 + +# testing group by on columns with null values +# if you see a seg fault here, it is likely because you are not currently supporting group by on columns with nulls + +query rowsort +select colH, count(*) from __mock_table_4 group by colH; +---- +💩 30 +💩💩💩 30 +💩💩 30 +varlen_null 10 + +query rowsort +select distinct count(*) from __mock_table_4 group by colG; +---- +20 + +query rowsort +select colH, min(colG) from __mock_table_4 group by colH; +---- +💩 1 +💩💩💩 1 +💩💩 1 +varlen_null integer_null \ No newline at end of file diff --git a/test/sql/p3.14-hash-join.slt b/test/sql/p3.14-hash-join.slt index f47d908ac..6a961f234 100644 --- a/test/sql/p3.14-hash-join.slt +++ b/test/sql/p3.14-hash-join.slt @@ -269,6 +269,18 @@ insert into temp_2 values (98, 351, 8), (99, 722, 9); +statement ok +create table big_l(a int, b int, c int, d int); + +statement ok +insert into big_l + select t1.colA, t1.colB, t1.colC, t1.colD + from temp_1 t1, temp_2 t2; + +query +ensure:hash_join +select count(*) from big_l b inner join temp_2 t2 on b.a = t2.colA; +---- +10000 statement ok create table temp_3(colA int, colB int); diff --git a/test/sql/p3.15-multi-way-hash-join.slt b/test/sql/p3.15-multi-way-hash-join.slt index 66b18fc0a..d53716d17 100644 --- a/test/sql/p3.15-multi-way-hash-join.slt +++ b/test/sql/p3.15-multi-way-hash-join.slt @@ -1,4 +1,4 @@ -# 8 pts +# 7 pts statement ok create table t1(v1 int); diff --git a/test/sql/p3.16-sort-limit.slt b/test/sql/p3.16-sort-limit.slt index f20801a34..10dee1672 100644 --- a/test/sql/p3.16-sort-limit.slt +++ b/test/sql/p3.16-sort-limit.slt @@ -54,6 +54,44 @@ query select * from temp_1 order by col1; ---- +# testing sort on null / varchar + +query +select * from __mock_table_tas_2025_fall order by github_id limit 5; +---- +17zhangw Tuesday +MrWhitezz Wednesday +notSaranshMalik Friday +quantumish Monday +rayhhome Friday + +query +select * from __mock_table_4 order by colH limit 3; +---- +integer_null varlen_null +integer_null varlen_null +integer_null varlen_null + +query +select * from __mock_table_4 order by colH desc, colG asc limit 3; +---- +integer_null 💩💩💩 +integer_null 💩💩💩 +integer_null 💩💩💩 + +query +select * from __mock_table_4 order by colG desc nulls first, colH nulls last limit 3; +---- +integer_null 💩 +integer_null 💩 +integer_null 💩 + +query +select * from __mock_table_4 order by colG asc nulls last, colH limit 3; +---- +1 💩 +1 💩 +1 💩 statement ok insert into temp_1 values @@ -653,3 +691,233 @@ select * from 1723 3500 61 35 3500 1823 3600 50 36 3600 1923 3700 39 37 3700 + + +statement ok +create table temp_3(v1 int, v2 varchar(128), v3 int, v4 int, v5 int, v6 int, v7 int, v8 int); + +statement ok +insert into temp_3 values +(0, '', 10, 100, 200, 300, 400, 500), +(1, '🥰', 11, 100, 200, 300, 400, 500), +(2, '🥰🥰', 12, 100, 200, 300, 400, 500), +(3, '🥰🥰🥰', 13, 100, 200, 300, 400, 500), +(4, '🥰🥰🥰🥰', 14, 100, 200, 300, 400, 500), +(5, '🥰🥰🥰🥰🥰', 15, 100, 200, 300, 400, 500), +(6, '', 16, 100, 200, 300, 400, 500), +(7, '🥰', 17, 100, 200, 300, 400, 500), +(8, '🥰🥰', 18, 100, 200, 300, 400, 500), +(9, '🥰🥰🥰', 19, 100, 200, 300, 400, 500), +(10, '🥰🥰🥰🥰', 20, 100, 200, 300, 400, 500), +(11, '🥰🥰🥰🥰🥰', 21, 100, 200, 300, 400, 500), +(12, '', 22, 100, 200, 300, 400, 500), +(13, '🥰', 23, 100, 200, 300, 400, 500), +(14, '🥰🥰', 24, 100, 200, 300, 400, 500), +(15, '🥰🥰🥰', 25, 100, 200, 300, 400, 500), +(16, '🥰🥰🥰🥰', 26, 100, 200, 300, 400, 500), +(17, '🥰🥰🥰🥰🥰', 27, 100, 200, 300, 400, 500), +(18, '', 28, 100, 200, 300, 400, 500), +(19, '🥰', 29, 100, 200, 300, 400, 500), +(20, '🥰🥰', 30, 100, 200, 300, 400, 500), +(21, '🥰🥰🥰', 31, 100, 200, 300, 400, 500), +(22, '🥰🥰🥰🥰', 32, 100, 200, 300, 400, 500), +(23, '🥰🥰🥰🥰🥰', 33, 100, 200, 300, 400, 500), +(24, '', 34, 100, 200, 300, 400, 500), +(25, '🥰', 35, 100, 200, 300, 400, 500), +(26, '🥰🥰', 36, 100, 200, 300, 400, 500), +(27, '🥰🥰🥰', 37, 100, 200, 300, 400, 500), +(28, '🥰🥰🥰🥰', 38, 100, 200, 300, 400, 500), +(29, '🥰🥰🥰🥰🥰', 39, 100, 200, 300, 400, 500), +(30, '', 40, 100, 200, 300, 400, 500), +(31, '🥰', 41, 100, 200, 300, 400, 500), +(32, '🥰🥰', 42, 100, 200, 300, 400, 500), +(33, '🥰🥰🥰', 43, 100, 200, 300, 400, 500), +(34, '🥰🥰🥰🥰', 44, 100, 200, 300, 400, 500), +(35, '🥰🥰🥰🥰🥰', 45, 100, 200, 300, 400, 500), +(36, '', 46, 100, 200, 300, 400, 500), +(37, '🥰', 47, 100, 200, 300, 400, 500), +(38, '🥰🥰', 48, 100, 200, 300, 400, 500), +(39, '🥰🥰🥰', 49, 100, 200, 300, 400, 500), +(40, '🥰🥰🥰🥰', 50, 100, 200, 300, 400, 500), +(41, '🥰🥰🥰🥰🥰', 51, 100, 200, 300, 400, 500), +(42, '', 52, 100, 200, 300, 400, 500), +(43, '🥰', 53, 100, 200, 300, 400, 500), +(44, '🥰🥰', 54, 100, 200, 300, 400, 500), +(45, '🥰🥰🥰', 55, 100, 200, 300, 400, 500), +(46, '🥰🥰🥰🥰', 56, 100, 200, 300, 400, 500), +(47, '🥰🥰🥰🥰🥰', 57, 100, 200, 300, 400, 500), +(48, '', 58, 100, 200, 300, 400, 500), +(49, '🥰', 59, 100, 200, 300, 400, 500), +(50, '🥰🥰', 60, 100, 200, 300, 400, 500), +(51, '🥰🥰🥰', 61, 100, 200, 300, 400, 500), +(52, '🥰🥰🥰🥰', 62, 100, 200, 300, 400, 500), +(53, '🥰🥰🥰🥰🥰', 63, 100, 200, 300, 400, 500), +(54, '', 64, 100, 200, 300, 400, 500), +(55, '🥰', 65, 100, 200, 300, 400, 500), +(56, '🥰🥰', 66, 100, 200, 300, 400, 500), +(57, '🥰🥰🥰', 67, 100, 200, 300, 400, 500), +(58, '🥰🥰🥰🥰', 68, 100, 200, 300, 400, 500), +(null, '🥰🥰🥰🥰🥰', 69, 100, 200, 300, 400, 500), +(60, '', 70, 100, 200, 300, 400, 500), +(61, '🥰', 71, 100, 200, 300, 400, 500), +(62, '🥰🥰', 72, 100, 200, 300, 400, 500), +(63, '🥰🥰🥰', 73, 100, 200, 300, 400, 500), +(64, '🥰🥰🥰🥰', 74, 100, 200, 300, 400, 500), +(null, '🥰🥰🥰🥰🥰', 75, 100, 200, 300, 400, 500), +(66, '', 76, 100, 200, 300, 400, 500), +(67, '🥰', 77, 100, 200, 300, 400, 500), +(68, '🥰🥰', 78, 100, 200, 300, 400, 500), +(69, '🥰🥰🥰', 79, 100, 200, 300, 400, 500), +(70, '🥰🥰🥰🥰', 80, 100, 200, 300, 400, 500), +(71, '🥰🥰🥰🥰🥰', 81, 100, 200, 300, 400, 500), +(72, '', 82, 100, 200, 300, 400, 500), +(73, '🥰', 83, 100, 200, 300, 400, 500), +(74, '🥰🥰', 84, 100, 200, 300, 400, 500), +(75, '🥰🥰🥰', 85, 100, 200, 300, 400, 500), +(76, '🥰🥰🥰🥰', 86, 100, 200, 300, 400, 500), +(77, '🥰🥰🥰🥰🥰', 87, 100, 200, 300, 400, 500), +(78, '', 88, 100, 200, 300, 400, 500), +(79, '🥰', 89, 100, 200, 300, 400, 500), +(80, '🥰🥰', 90, 100, 200, 300, 400, 500), +(81, '🥰🥰🥰', 91, 100, 200, 300, 400, 500), +(82, '🥰🥰🥰🥰', 92, 100, 200, 300, 400, 500), +(83, '🥰🥰🥰🥰🥰', 93, 100, 200, 300, 400, 500), +(84, '', 94, 100, 200, 300, 400, 500), +(85, '🥰', 95, 100, 200, 300, 400, 500), +(86, '🥰🥰', 96, 100, 200, 300, 400, 500), +(87, '🥰🥰🥰', 97, 100, 200, 300, 400, 500), +(88, '🥰🥰🥰🥰', 98, 100, 200, 300, 400, 500), +(89, '🥰🥰🥰🥰🥰', 99, 100, 200, 300, 400, 500), +(90, '', 100, 100, 200, 300, 400, 500), +(91, '🥰', 101, 100, 200, 300, 400, 500), +(92, '🥰🥰', 102, 100, 200, 300, 400, 500), +(93, '🥰🥰🥰', 103, 100, 200, 300, 400, 500), +(94, '🥰🥰🥰🥰', 104, 100, 200, 300, 400, 500), +(95, '🥰🥰🥰🥰🥰', 105, 100, 200, 300, 400, 500), +(96, '', 106, 100, 200, 300, 400, 500), +(97, '🥰', 107, 100, 200, 300, 400, 500), +(98, '🥰🥰', 108, 100, 200, 300, 400, 500), +(99, '🥰🥰🥰', 109, 100, 200, 300, 400, 500), +(100, '🥰🥰🥰🥰', 110, 100, 200, 300, 400, 500), +(101, '🥰🥰🥰🥰🥰', 111, 100, 200, 300, 400, 500), +(102, '', 112, 100, 200, 300, 400, 500), +(103, '🥰', 113, 100, 200, 300, 400, 500), +(104, '🥰🥰', 114, 100, 200, 300, 400, 500), +(105, '🥰🥰🥰', 115, 100, 200, 300, 400, 500), +(106, '🥰🥰🥰🥰', 116, 100, 200, 300, 400, 500), +(107, '🥰🥰🥰🥰🥰', 117, 100, 200, 300, 400, 500), +(108, '', 118, 100, 200, 300, 400, 500), +(109, '🥰', 119, 100, 200, 300, 400, 500), +(110, '🥰🥰', 120, 100, 200, 300, 400, 500), +(111, '🥰🥰🥰', 121, 100, 200, 300, 400, 500), +(112, '🥰🥰🥰🥰', 122, 100, 200, 300, 400, 500), +(113, '🥰🥰🥰🥰🥰', 123, 100, 200, 300, 400, 500), +(114, '', 124, 100, 200, 300, 400, 500), +(115, '🥰', 125, 100, 200, 300, 400, 500), +(116, '🥰🥰', 126, 100, 200, 300, 400, 500), +(117, '🥰🥰🥰', 127, 100, 200, 300, 400, 500), +(118, '🥰🥰🥰🥰', 128, 100, 200, 300, 400, 500), +(119, '🥰🥰🥰🥰🥰', 129, 100, 200, 300, 400, 500), +(120, '', 130, 100, 200, 300, 400, 500), +(121, '🥰', 131, 100, 200, 300, 400, 500), +(122, '🥰🥰', 132, 100, 200, 300, 400, 500), +(123, '🥰🥰🥰', 133, 100, 200, 300, 400, 500), +(124, '🥰🥰🥰🥰', 134, 100, 200, 300, 400, 500), +(125, '🥰🥰🥰🥰🥰', 135, 100, 200, 300, 400, 500), +(126, '', 136, 100, 200, 300, 400, 500), +(127, '🥰', 137, 100, 200, 300, 400, 500), +(128, '🥰🥰', 138, 100, 200, 300, 400, 500), +(129, '🥰🥰🥰', 139, 100, 200, 300, 400, 500), +(130, '🥰🥰🥰🥰', 140, 100, 200, 300, 400, 500), +(131, '🥰🥰🥰🥰🥰', 141, 100, 200, 300, 400, 500), +(132, '', 142, 100, 200, 300, 400, 500), +(133, '🥰', 143, 100, 200, 300, 400, 500), +(134, '🥰🥰', 144, 100, 200, 300, 400, 500), +(135, '🥰🥰🥰', 145, 100, 200, 300, 400, 500), +(136, '🥰🥰🥰🥰', 146, 100, 200, 300, 400, 500), +(null, '🥰🥰🥰🥰🥰', 147, 100, 200, 300, 400, 500), +(138, '', 148, 100, 200, 300, 400, 500), +(139, '🥰', 149, 100, 200, 300, 400, 500), +(140, '🥰🥰', 150, 100, 200, 300, 400, 500), +(141, '🥰🥰🥰', 151, 100, 200, 300, 400, 500), +(142, '🥰🥰🥰🥰', 152, 100, 200, 300, 400, 500), +(143, '🥰🥰🥰🥰🥰', 153, 100, 200, 300, 400, 500), +(144, '', 154, 100, 200, 300, 400, 500), +(145, '🥰', 155, 100, 200, 300, 400, 500), +(146, '🥰🥰', 156, 100, 200, 300, 400, 500), +(147, '🥰🥰🥰', 157, 100, 200, 300, 400, 500), +(148, '🥰🥰🥰🥰', 158, 100, 200, 300, 400, 500), +(149, '🥰🥰🥰🥰🥰', 159, 100, 200, 300, 400, 500), +(150, '', 160, 100, 200, 300, 400, 500), +(151, '🥰', 161, 100, 200, 300, 400, 500), +(152, '🥰🥰', 162, 100, 200, 300, 400, 500), +(153, '🥰🥰🥰', 163, 100, 200, 300, 400, 500), +(154, '🥰🥰🥰🥰', 164, 100, 200, 300, 400, 500), +(155, '🥰🥰🥰🥰🥰', 165, 100, 200, 300, 400, 500), +(156, '', 166, 100, 200, 300, 400, 500), +(157, '🥰', 167, 100, 200, 300, 400, 500), +(158, '🥰🥰', 168, 100, 200, 300, 400, 500), +(159, '🥰🥰🥰', 169, 100, 200, 300, 400, 500), +(160, '🥰🥰🥰🥰', 170, 100, 200, 300, 400, 500), +(161, '🥰🥰🥰🥰🥰', 171, 100, 200, 300, 400, 500), +(162, '', 172, 100, 200, 300, 400, 500), +(163, '🥰', 173, 100, 200, 300, 400, 500), +(164, '🥰🥰', 174, 100, 200, 300, 400, 500), +(165, '🥰🥰🥰', 175, 100, 200, 300, 400, 500), +(166, '🥰🥰🥰🥰', 176, 100, 200, 300, 400, 500), +(167, '🥰🥰🥰🥰🥰', 177, 100, 200, 300, 400, 500), +(168, '', 178, 100, 200, 300, 400, 500), +(169, '🥰', 179, 100, 200, 300, 400, 500), +(170, '🥰🥰', 180, 100, 200, 300, 400, 500), +(171, '🥰🥰🥰', 181, 100, 200, 300, 400, 500), +(172, '🥰🥰🥰🥰', 182, 100, 200, 300, 400, 500), +(173, '🥰🥰🥰🥰🥰', 183, 100, 200, 300, 400, 500), +(174, '', 184, 100, 200, 300, 400, 500), +(175, '🥰', 185, 100, 200, 300, 400, 500), +(176, '🥰🥰', 186, 100, 200, 300, 400, 500), +(177, '🥰🥰🥰', 187, 100, 200, 300, 400, 500), +(178, '🥰🥰🥰🥰', 188, 100, 200, 300, 400, 500), +(179, '🥰🥰🥰🥰🥰', 189, 100, 200, 300, 400, 500), +(180, '', 190, 100, 200, 300, 400, 500), +(181, '🥰', 191, 100, 200, 300, 400, 500), +(182, '🥰🥰', 192, 100, 200, 300, 400, 500), +(183, '🥰🥰🥰', 193, 100, 200, 300, 400, 500), +(184, '🥰🥰🥰🥰', 194, 100, 200, 300, 400, 500), +(185, '🥰🥰🥰🥰🥰', 195, 100, 200, 300, 400, 500), +(186, '', 196, 100, 200, 300, 400, 500), +(187, '🥰', 197, 100, 200, 300, 400, 500), +(188, '🥰🥰', 198, 100, 200, 300, 400, 500), +(189, '🥰🥰🥰', 199, 100, 200, 300, 400, 500), +(190, '🥰🥰🥰🥰', 200, 100, 200, 300, 400, 500), +(191, '🥰🥰🥰🥰🥰', 201, 100, 200, 300, 400, 500), +(192, '', 202, 100, 200, 300, 400, 500), +(193, '🥰', 203, 100, 200, 300, 400, 500), +(194, '🥰🥰', 204, 100, 200, 300, 400, 500), +(195, '🥰🥰🥰', 205, 100, 200, 300, 400, 500), +(196, '🥰🥰🥰🥰', 206, 100, 200, 300, 400, 500), +(197, '🥰🥰🥰🥰🥰', 207, 100, 200, 300, 400, 500), +(198, '', 208, 100, 200, 300, 400, 500), +(199, '🥰', 209, 100, 200, 300, 400, 500); + +query +select * from temp_3 order by v2 desc, v1 asc nulls first, v3 desc limit 10; +---- +integer_null 🥰🥰🥰🥰🥰 147 100 200 300 400 500 +integer_null 🥰🥰🥰🥰🥰 75 100 200 300 400 500 +integer_null 🥰🥰🥰🥰🥰 69 100 200 300 400 500 +5 🥰🥰🥰🥰🥰 15 100 200 300 400 500 +11 🥰🥰🥰🥰🥰 21 100 200 300 400 500 +17 🥰🥰🥰🥰🥰 27 100 200 300 400 500 +23 🥰🥰🥰🥰🥰 33 100 200 300 400 500 +29 🥰🥰🥰🥰🥰 39 100 200 300 400 500 +35 🥰🥰🥰🥰🥰 45 100 200 300 400 500 +41 🥰🥰🥰🥰🥰 51 100 200 300 400 500 + +query +select v1, v2 from (select v1, v2, v5, v6, v5 + v6, v3, v4, v3 - v4 from temp_3 order by v1 desc limit 25) order by v2, v1 limit 5; +---- +180 +186 +192 +198 +175 🥰 \ No newline at end of file diff --git a/test/sql/p3.17-topn.slt b/test/sql/p3.17-topn.slt index 58d1bb5b3..73e2218ca 100644 --- a/test/sql/p3.17-topn.slt +++ b/test/sql/p3.17-topn.slt @@ -1,4 +1,5 @@ -# 7 pts +# 0 pts +# Fall 2025 - TopN Not Implemented query +ensure:topn select * from test_simple_seq_2 order by col1 desc limit 5; diff --git a/test/sql/p3.18-integration-1.slt b/test/sql/p3.18-integration-1.slt index db4fa7f64..38857d63c 100644 --- a/test/sql/p3.18-integration-1.slt +++ b/test/sql/p3.18-integration-1.slt @@ -1,4 +1,4 @@ -# 8 pts +# 7 pts # This is something done by some TAs in some start-ups before. Once they got a wrong result # in a query, they split the query into parts so as to find which one goes wrong... # diff --git a/test/sql/p3.19-integration-2.slt b/test/sql/p3.19-integration-2.slt index 0388e5a67..5c6dbc537 100644 --- a/test/sql/p3.19-integration-2.slt +++ b/test/sql/p3.19-integration-2.slt @@ -1,4 +1,4 @@ -# 10 pts +# 7 pts # Run in RELEASE MODE, set at least 60s timeout. # This test is like how a data engineer works towards their goal. They query some data,