From 2fd589ee180d506ad5e7a7250ffb6a9c4fccf6ec Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 18 Feb 2021 09:50:25 +0800 Subject: [PATCH 1/3] [NSE-108] Add end-to-end test suite against TPC-DS --- core/pom.xml | 10 +- .../intel/oap/{tpch => tpc}/MallocUtils.java | 6 +- .../resources/tpcds-queries-double/q1.sql | 19 + .../resources/tpcds-queries-double/q10.sql | 57 ++ .../resources/tpcds-queries-double/q11.sql | 68 ++ .../resources/tpcds-queries-double/q12.sql | 22 + .../resources/tpcds-queries-double/q13.sql | 49 ++ .../resources/tpcds-queries-double/q14a.sql | 120 ++++ .../resources/tpcds-queries-double/q14b.sql | 95 +++ .../resources/tpcds-queries-double/q15.sql | 15 + .../resources/tpcds-queries-double/q16.sql | 23 + .../resources/tpcds-queries-double/q17.sql | 33 + .../resources/tpcds-queries-double/q18.sql | 28 + .../resources/tpcds-queries-double/q19.sql | 19 + .../resources/tpcds-queries-double/q2.sql | 81 +++ .../resources/tpcds-queries-double/q20.sql | 18 + .../resources/tpcds-queries-double/q21.sql | 25 + .../resources/tpcds-queries-double/q22.sql | 14 + .../resources/tpcds-queries-double/q23a.sql | 53 ++ .../resources/tpcds-queries-double/q23b.sql | 68 ++ .../resources/tpcds-queries-double/q24a.sql | 34 + .../resources/tpcds-queries-double/q24b.sql | 34 + .../resources/tpcds-queries-double/q25.sql | 33 + .../resources/tpcds-queries-double/q26.sql | 19 + .../resources/tpcds-queries-double/q27.sql | 21 + .../resources/tpcds-queries-double/q28.sql | 56 ++ .../resources/tpcds-queries-double/q29.sql | 32 + .../resources/tpcds-queries-double/q3.sql | 13 + .../resources/tpcds-queries-double/q30.sql | 35 + .../resources/tpcds-queries-double/q31.sql | 60 ++ .../resources/tpcds-queries-double/q32.sql | 15 + .../resources/tpcds-queries-double/q33.sql | 65 ++ .../resources/tpcds-queries-double/q34.sql | 32 + .../resources/tpcds-queries-double/q35.sql | 46 ++ .../resources/tpcds-queries-double/q36.sql | 26 + .../resources/tpcds-queries-double/q37.sql | 15 + .../resources/tpcds-queries-double/q38.sql | 30 + .../resources/tpcds-queries-double/q39a.sql | 47 ++ .../resources/tpcds-queries-double/q39b.sql | 48 ++ .../resources/tpcds-queries-double/q4.sql | 120 ++++ .../resources/tpcds-queries-double/q40.sql | 25 + .../resources/tpcds-queries-double/q41.sql | 49 ++ .../resources/tpcds-queries-double/q42.sql | 18 + .../resources/tpcds-queries-double/q43.sql | 33 + .../resources/tpcds-queries-double/q44.sql | 46 ++ .../resources/tpcds-queries-double/q45.sql | 21 + .../resources/tpcds-queries-double/q46.sql | 32 + .../resources/tpcds-queries-double/q47.sql | 63 ++ .../resources/tpcds-queries-double/q48.sql | 63 ++ .../resources/tpcds-queries-double/q49.sql | 126 ++++ .../resources/tpcds-queries-double/q5.sql | 131 ++++ .../resources/tpcds-queries-double/q50.sql | 47 ++ .../resources/tpcds-queries-double/q51.sql | 55 ++ .../resources/tpcds-queries-double/q52.sql | 14 + .../resources/tpcds-queries-double/q53.sql | 30 + .../resources/tpcds-queries-double/q54.sql | 61 ++ .../resources/tpcds-queries-double/q55.sql | 13 + .../resources/tpcds-queries-double/q56.sql | 65 ++ .../resources/tpcds-queries-double/q57.sql | 56 ++ .../resources/tpcds-queries-double/q58.sql | 59 ++ .../resources/tpcds-queries-double/q59.sql | 75 ++ .../resources/tpcds-queries-double/q6.sql | 21 + .../resources/tpcds-queries-double/q60.sql | 62 ++ .../resources/tpcds-queries-double/q61.sql | 33 + .../resources/tpcds-queries-double/q62.sql | 35 + .../resources/tpcds-queries-double/q63.sql | 31 + .../resources/tpcds-queries-double/q64.sql | 92 +++ .../resources/tpcds-queries-double/q65.sql | 33 + .../resources/tpcds-queries-double/q66.sql | 240 +++++++ .../resources/tpcds-queries-double/q67.sql | 38 + .../resources/tpcds-queries-double/q68.sql | 34 + .../resources/tpcds-queries-double/q69.sql | 38 + .../resources/tpcds-queries-double/q7.sql | 19 + .../resources/tpcds-queries-double/q70.sql | 38 + .../resources/tpcds-queries-double/q71.sql | 44 ++ .../resources/tpcds-queries-double/q72.sql | 33 + .../resources/tpcds-queries-double/q73.sql | 30 + .../resources/tpcds-queries-double/q74.sql | 58 ++ .../resources/tpcds-queries-double/q75.sql | 76 ++ .../resources/tpcds-queries-double/q76.sql | 47 ++ .../resources/tpcds-queries-double/q77.sql | 100 +++ .../resources/tpcds-queries-double/q78.sql | 64 ++ .../resources/tpcds-queries-double/q79.sql | 27 + .../resources/tpcds-queries-double/q8.sql | 87 +++ .../resources/tpcds-queries-double/q80.sql | 94 +++ .../resources/tpcds-queries-double/q81.sql | 38 + .../resources/tpcds-queries-double/q82.sql | 15 + .../resources/tpcds-queries-double/q83.sql | 56 ++ .../resources/tpcds-queries-double/q84.sql | 19 + .../resources/tpcds-queries-double/q85.sql | 82 +++ .../resources/tpcds-queries-double/q86.sql | 24 + .../resources/tpcds-queries-double/q87.sql | 28 + .../resources/tpcds-queries-double/q88.sql | 122 ++++ .../resources/tpcds-queries-double/q89.sql | 30 + .../resources/tpcds-queries-double/q9.sql | 48 ++ .../resources/tpcds-queries-double/q90.sql | 19 + .../resources/tpcds-queries-double/q91.sql | 23 + .../resources/tpcds-queries-double/q92.sql | 16 + .../resources/tpcds-queries-double/q93.sql | 19 + .../resources/tpcds-queries-double/q94.sql | 23 + .../resources/tpcds-queries-double/q95.sql | 29 + .../resources/tpcds-queries-double/q96.sql | 11 + .../resources/tpcds-queries-double/q97.sql | 30 + .../resources/tpcds-queries-double/q98.sql | 21 + .../resources/tpcds-queries-double/q99.sql | 34 + .../q1.sql | 0 .../q10.sql | 0 .../q11.sql | 0 .../q12.sql | 0 .../q13.sql | 0 .../q14.sql | 0 .../q15.sql | 0 .../q16.sql | 0 .../q17.sql | 0 .../q18.sql | 0 .../q19.sql | 0 .../q2.sql | 0 .../q20.sql | 0 .../q21.sql | 0 .../q22.sql | 0 .../q3.sql | 0 .../q4.sql | 0 .../q5.sql | 0 .../q6.sql | 0 .../q7.sql | 0 .../q8.sql | 0 .../q9.sql | 0 .../com/intel/oap/tpc/ds/TPCDSSuite.scala | 91 +++ .../com/intel/oap/tpc/ds/TPCDSTableGen.scala | 676 ++++++++++++++++++ .../{tpch => tpc/h}/GitHubActionEntries.scala | 7 +- .../intel/oap/{tpch => tpc/h}/TPCHSuite.scala | 352 +-------- .../com/intel/oap/tpc/h/TPCHTableGen.scala | 320 +++++++++ .../com/intel/oap/tpc/util/TPCRunner.scala | 66 ++ cpp/src/jni/jni_wrapper.cc | 4 +- 134 files changed, 5917 insertions(+), 346 deletions(-) rename core/src/test/java/com/intel/oap/{tpch => tpc}/MallocUtils.java (91%) create mode 100755 core/src/test/resources/tpcds-queries-double/q1.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q10.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q11.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q12.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q13.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q14a.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q14b.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q15.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q16.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q17.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q18.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q19.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q2.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q20.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q21.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q22.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q23a.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q23b.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q24a.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q24b.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q25.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q26.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q27.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q28.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q29.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q3.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q30.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q31.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q32.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q33.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q34.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q35.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q36.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q37.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q38.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q39a.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q39b.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q4.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q40.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q41.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q42.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q43.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q44.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q45.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q46.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q47.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q48.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q49.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q5.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q50.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q51.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q52.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q53.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q54.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q55.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q56.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q57.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q58.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q59.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q6.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q60.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q61.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q62.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q63.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q64.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q65.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q66.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q67.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q68.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q69.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q7.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q70.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q71.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q72.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q73.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q74.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q75.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q76.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q77.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q78.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q79.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q8.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q80.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q81.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q82.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q83.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q84.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q85.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q86.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q87.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q88.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q89.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q9.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q90.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q91.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q92.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q93.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q94.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q95.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q96.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q97.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q98.sql create mode 100755 core/src/test/resources/tpcds-queries-double/q99.sql rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q1.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q10.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q11.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q12.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q13.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q14.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q15.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q16.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q17.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q18.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q19.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q2.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q20.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q21.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q22.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q3.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q4.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q5.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q6.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q7.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q8.sql (100%) rename core/src/test/resources/{tpch-queries => tpch-queries-double}/q9.sql (100%) create mode 100644 core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala create mode 100644 core/src/test/scala/com/intel/oap/tpc/ds/TPCDSTableGen.scala rename core/src/test/scala/com/intel/oap/{tpch => tpc/h}/GitHubActionEntries.scala (92%) rename core/src/test/scala/com/intel/oap/{tpch => tpc/h}/TPCHSuite.scala (51%) create mode 100644 core/src/test/scala/com/intel/oap/tpc/h/TPCHTableGen.scala create mode 100644 core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala diff --git a/core/pom.xml b/core/pom.xml index 665bb53a8..eb63c5db4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -226,9 +226,15 @@ test - io.prestosql.tpch + io.trino.tpch tpch - 1.0 + 1.1 + test + + + io.trino.tpcds + tpcds + 1.4 test diff --git a/core/src/test/java/com/intel/oap/tpch/MallocUtils.java b/core/src/test/java/com/intel/oap/tpc/MallocUtils.java similarity index 91% rename from core/src/test/java/com/intel/oap/tpch/MallocUtils.java rename to core/src/test/java/com/intel/oap/tpc/MallocUtils.java index f1062d76d..1d55378a2 100644 --- a/core/src/test/java/com/intel/oap/tpch/MallocUtils.java +++ b/core/src/test/java/com/intel/oap/tpc/MallocUtils.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package com.intel.oap.tpch; +package com.intel.oap.tpc; import com.intel.oap.vectorized.JniUtils; @@ -35,10 +35,10 @@ public class MallocUtils { * Visible for testing: Try turning back allocated native memory to OS. This might have no effect * when using Jemalloc. */ - static native void mallocTrim(); + public static native void mallocTrim(); /** * Visible for testing: Print malloc statistics. */ - static native void mallocStats(); + public static native void mallocStats(); } diff --git a/core/src/test/resources/tpcds-queries-double/q1.sql b/core/src/test/resources/tpcds-queries-double/q1.sql new file mode 100755 index 000000000..4d20faad8 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q1.sql @@ -0,0 +1,19 @@ +WITH customer_total_return AS +( SELECT + sr_customer_sk AS ctr_customer_sk, + sr_store_sk AS ctr_store_sk, + sum(sr_return_amt) AS ctr_total_return + FROM store_returns, date_dim + WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 + GROUP BY sr_customer_sk, sr_store_sk) +SELECT c_customer_id +FROM customer_total_return ctr1, store, customer +WHERE ctr1.ctr_total_return > + (SELECT avg(ctr_total_return) * 1.2 + FROM customer_total_return ctr2 + WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) + AND s_store_sk = ctr1.ctr_store_sk + AND s_state = 'TN' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q10.sql b/core/src/test/resources/tpcds-queries-double/q10.sql new file mode 100755 index 000000000..5500e1aea --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q10.sql @@ -0,0 +1,57 @@ +SELECT + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + ca_county IN ('Rush County', 'Toole County', 'Jefferson County', + 'Dona Ana County', 'La Porte County') AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3) AND + (exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3) OR + exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3)) +GROUP BY cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +ORDER BY cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q11.sql b/core/src/test/resources/tpcds-queries-double/q11.sql new file mode 100755 index 000000000..3618fb14f --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q11.sql @@ -0,0 +1,68 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(ss_ext_list_price - ss_ext_discount_amt) year_total, + 's' sale_type + FROM customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk + AND ss_sold_date_sk = d_date_sk + GROUP BY c_customer_id + , c_first_name + , c_last_name + , d_year + , c_preferred_cust_flag + , c_birth_country + , c_login + , c_email_address + , d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(ws_ext_list_price - ws_ext_discount_amt) year_total, + 'w' sale_type + FROM customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk + AND ws_sold_date_sk = d_date_sk + GROUP BY + c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, + c_login, c_email_address, d_year) +SELECT t_s_secyear.customer_preferred_cust_flag +FROM year_total t_s_firstyear + , year_total t_s_secyear + , year_total t_w_firstyear + , year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.dyear = 2001 + AND t_s_secyear.dyear = 2001 + 1 + AND t_w_firstyear.dyear = 2001 + AND t_w_secyear.dyear = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END +ORDER BY t_s_secyear.customer_preferred_cust_flag +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q12.sql b/core/src/test/resources/tpcds-queries-double/q12.sql new file mode 100755 index 000000000..0382737f5 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q12.sql @@ -0,0 +1,22 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(ws_ext_sales_price) AS itemrevenue, + sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM + web_sales, item, date_dim +WHERE + ws_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) + AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY + i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY + i_category, i_class, i_item_id, i_item_desc, revenueratio +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q13.sql b/core/src/test/resources/tpcds-queries-double/q13.sql new file mode 100755 index 000000000..32dc9e260 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q13.sql @@ -0,0 +1,49 @@ +SELECT + avg(ss_quantity), + avg(ss_ext_sales_price), + avg(ss_ext_wholesale_cost), + sum(ss_ext_wholesale_cost) +FROM store_sales + , store + , customer_demographics + , household_demographics + , customer_address + , date_dim +WHERE s_store_sk = ss_store_sk + AND ss_sold_date_sk = d_date_sk AND d_year = 2001 + AND ((ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'M' + AND cd_education_status = 'Advanced Degree' + AND ss_sales_price BETWEEN 100.00 AND 150.00 + AND hd_dep_count = 3 +) OR + (ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'S' + AND cd_education_status = 'College' + AND ss_sales_price BETWEEN 50.00 AND 100.00 + AND hd_dep_count = 1 + ) OR + (ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'W' + AND cd_education_status = '2 yr Degree' + AND ss_sales_price BETWEEN 150.00 AND 200.00 + AND hd_dep_count = 1 + )) + AND ((ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('TX', 'OH', 'TX') + AND ss_net_profit BETWEEN 100 AND 200 +) OR + (ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('OR', 'NM', 'KY') + AND ss_net_profit BETWEEN 150 AND 300 + ) OR + (ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('VA', 'TX', 'MS') + AND ss_net_profit BETWEEN 50 AND 250 + )) diff --git a/core/src/test/resources/tpcds-queries-double/q14a.sql b/core/src/test/resources/tpcds-queries-double/q14a.sql new file mode 100755 index 000000000..954ddd41b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q14a.sql @@ -0,0 +1,120 @@ +WITH cross_items AS +(SELECT i_item_sk ss_item_sk + FROM item, + (SELECT + iss.i_brand_id brand_id, + iss.i_class_id class_id, + iss.i_category_id category_id + FROM store_sales, item iss, date_dim d1 + WHERE ss_item_sk = iss.i_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND d1.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + ics.i_brand_id, + ics.i_class_id, + ics.i_category_id + FROM catalog_sales, item ics, date_dim d2 + WHERE cs_item_sk = ics.i_item_sk + AND cs_sold_date_sk = d2.d_date_sk + AND d2.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + iws.i_brand_id, + iws.i_class_id, + iws.i_category_id + FROM web_sales, item iws, date_dim d3 + WHERE ws_item_sk = iws.i_item_sk + AND ws_sold_date_sk = d3.d_date_sk + AND d3.d_year BETWEEN 1999 AND 1999 + 2) x + WHERE i_brand_id = brand_id + AND i_class_id = class_id + AND i_category_id = category_id +), + avg_sales AS + (SELECT avg(quantity * list_price) average_sales + FROM ( + SELECT + ss_quantity quantity, + ss_list_price list_price + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 2001 + UNION ALL + SELECT + cs_quantity quantity, + cs_list_price list_price + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + ws_quantity quantity, + ws_list_price list_price + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 1999 + 2) x) +SELECT + channel, + i_brand_id, + i_class_id, + i_category_id, + sum(sales), + sum(number_sales) +FROM ( + SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales) + UNION ALL + SELECT + 'catalog' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(cs_quantity * cs_list_price) sales, + count(*) number_sales + FROM catalog_sales, item, date_dim + WHERE cs_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales) + UNION ALL + SELECT + 'web' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ws_quantity * ws_list_price) sales, + count(*) number_sales + FROM web_sales, item, date_dim + WHERE ws_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales + FROM avg_sales) + ) y +GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id) +ORDER BY channel, i_brand_id, i_class_id, i_category_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q14b.sql b/core/src/test/resources/tpcds-queries-double/q14b.sql new file mode 100755 index 000000000..929a8484b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q14b.sql @@ -0,0 +1,95 @@ +WITH cross_items AS +(SELECT i_item_sk ss_item_sk + FROM item, + (SELECT + iss.i_brand_id brand_id, + iss.i_class_id class_id, + iss.i_category_id category_id + FROM store_sales, item iss, date_dim d1 + WHERE ss_item_sk = iss.i_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND d1.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + ics.i_brand_id, + ics.i_class_id, + ics.i_category_id + FROM catalog_sales, item ics, date_dim d2 + WHERE cs_item_sk = ics.i_item_sk + AND cs_sold_date_sk = d2.d_date_sk + AND d2.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + iws.i_brand_id, + iws.i_class_id, + iws.i_category_id + FROM web_sales, item iws, date_dim d3 + WHERE ws_item_sk = iws.i_item_sk + AND ws_sold_date_sk = d3.d_date_sk + AND d3.d_year BETWEEN 1999 AND 1999 + 2) x + WHERE i_brand_id = brand_id + AND i_class_id = class_id + AND i_category_id = category_id +), + avg_sales AS + (SELECT avg(quantity * list_price) average_sales + FROM (SELECT + ss_quantity quantity, + ss_list_price list_price + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + cs_quantity quantity, + cs_list_price list_price + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + ws_quantity quantity, + ws_list_price list_price + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) +SELECT * +FROM + (SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11) + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales)) this_year, + (SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11) + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales)) last_year +WHERE this_year.i_brand_id = last_year.i_brand_id + AND this_year.i_class_id = last_year.i_class_id + AND this_year.i_category_id = last_year.i_category_id +ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q15.sql b/core/src/test/resources/tpcds-queries-double/q15.sql new file mode 100755 index 000000000..b8182e23b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q15.sql @@ -0,0 +1,15 @@ +SELECT + ca_zip, + sum(cs_sales_price) +FROM catalog_sales, customer, customer_address, date_dim +WHERE cs_bill_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', + '85392', '85460', '80348', '81792') + OR ca_state IN ('CA', 'WA', 'GA') + OR cs_sales_price > 500) + AND cs_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 2001 +GROUP BY ca_zip +ORDER BY ca_zip +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q16.sql b/core/src/test/resources/tpcds-queries-double/q16.sql new file mode 100755 index 000000000..732ad0d84 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q16.sql @@ -0,0 +1,23 @@ +SELECT + count(DISTINCT cs_order_number) AS `order count `, + sum(cs_ext_ship_cost) AS `total shipping cost `, + sum(cs_net_profit) AS `total net profit ` +FROM + catalog_sales cs1, date_dim, customer_address, call_center +WHERE + d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) + AND cs1.cs_ship_date_sk = d_date_sk + AND cs1.cs_ship_addr_sk = ca_address_sk + AND ca_state = 'GA' + AND cs1.cs_call_center_sk = cc_call_center_sk + AND cc_county IN + ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') + AND EXISTS(SELECT * + FROM catalog_sales cs2 + WHERE cs1.cs_order_number = cs2.cs_order_number + AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) + AND NOT EXISTS(SELECT * + FROM catalog_returns cr1 + WHERE cs1.cs_order_number = cr1.cr_order_number) +ORDER BY count(DISTINCT cs_order_number) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q17.sql b/core/src/test/resources/tpcds-queries-double/q17.sql new file mode 100755 index 000000000..4d647f795 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q17.sql @@ -0,0 +1,33 @@ +SELECT + i_item_id, + i_item_desc, + s_state, + count(ss_quantity) AS store_sales_quantitycount, + avg(ss_quantity) AS store_sales_quantityave, + stddev_samp(ss_quantity) AS store_sales_quantitystdev, + stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov, + count(sr_return_quantity) as_store_returns_quantitycount, + avg(sr_return_quantity) as_store_returns_quantityave, + stddev_samp(sr_return_quantity) as_store_returns_quantitystdev, + stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov, + count(cs_quantity) AS catalog_sales_quantitycount, + avg(cs_quantity) AS catalog_sales_quantityave, + stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev, + stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov +FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item +WHERE d1.d_quarter_name = '2001Q1' + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') +GROUP BY i_item_id, i_item_desc, s_state +ORDER BY i_item_id, i_item_desc, s_state +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q18.sql b/core/src/test/resources/tpcds-queries-double/q18.sql new file mode 100755 index 000000000..567d4da66 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q18.sql @@ -0,0 +1,28 @@ +SELECT + i_item_id, + ca_country, + ca_state, + ca_county, + avg(cast(cs_quantity AS DOUBLE)) agg1, + avg(cast(cs_list_price AS DOUBLE)) agg2, + avg(cast(cs_coupon_amt AS DOUBLE)) agg3, + avg(cast(cs_sales_price AS DOUBLE)) agg4, + avg(cast(cs_net_profit AS DOUBLE)) agg5, + avg(cast(c_birth_year AS DOUBLE)) agg6, + avg(cast(cd1.cd_dep_count AS DOUBLE)) agg7 +FROM catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item +WHERE cs_sold_date_sk = d_date_sk AND + cs_item_sk = i_item_sk AND + cs_bill_cdemo_sk = cd1.cd_demo_sk AND + cs_bill_customer_sk = c_customer_sk AND + cd1.cd_gender = 'F' AND + cd1.cd_education_status = 'Unknown' AND + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_addr_sk = ca_address_sk AND + c_birth_month IN (1, 6, 8, 9, 12, 2) AND + d_year = 1998 AND + ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS') +GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county) +ORDER BY ca_country, ca_state, ca_county, i_item_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q19.sql b/core/src/test/resources/tpcds-queries-double/q19.sql new file mode 100755 index 000000000..e38ab7f26 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q19.sql @@ -0,0 +1,19 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + i_manufact_id, + i_manufact, + sum(ss_ext_sales_price) ext_price +FROM date_dim, store_sales, item, customer, customer_address, store +WHERE d_date_sk = ss_sold_date_sk + AND ss_item_sk = i_item_sk + AND i_manager_id = 8 + AND d_moy = 11 + AND d_year = 1998 + AND ss_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) + AND ss_store_sk = s_store_sk +GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact +ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q2.sql b/core/src/test/resources/tpcds-queries-double/q2.sql new file mode 100755 index 000000000..52c0e90c4 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q2.sql @@ -0,0 +1,81 @@ +WITH wscs AS +( SELECT + sold_date_sk, + sales_price + FROM (SELECT + ws_sold_date_sk sold_date_sk, + ws_ext_sales_price sales_price + FROM web_sales) x + UNION ALL + (SELECT + cs_sold_date_sk sold_date_sk, + cs_ext_sales_price sales_price + FROM catalog_sales)), + wswscs AS + ( SELECT + d_week_seq, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN sales_price + ELSE NULL END) + sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN sales_price + ELSE NULL END) + mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN sales_price + ELSE NULL END) + tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN sales_price + ELSE NULL END) + wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN sales_price + ELSE NULL END) + thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN sales_price + ELSE NULL END) + fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN sales_price + ELSE NULL END) + sat_sales + FROM wscs, date_dim + WHERE d_date_sk = sold_date_sk + GROUP BY d_week_seq) +SELECT + d_week_seq1, + round(sun_sales1 / sun_sales2, 2), + round(mon_sales1 / mon_sales2, 2), + round(tue_sales1 / tue_sales2, 2), + round(wed_sales1 / wed_sales2, 2), + round(thu_sales1 / thu_sales2, 2), + round(fri_sales1 / fri_sales2, 2), + round(sat_sales1 / sat_sales2, 2) +FROM + (SELECT + wswscs.d_week_seq d_week_seq1, + sun_sales sun_sales1, + mon_sales mon_sales1, + tue_sales tue_sales1, + wed_sales wed_sales1, + thu_sales thu_sales1, + fri_sales fri_sales1, + sat_sales sat_sales1 + FROM wswscs, date_dim + WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, + (SELECT + wswscs.d_week_seq d_week_seq2, + sun_sales sun_sales2, + mon_sales mon_sales2, + tue_sales tue_sales2, + wed_sales wed_sales2, + thu_sales thu_sales2, + fri_sales fri_sales2, + sat_sales sat_sales2 + FROM wswscs, date_dim + WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z +WHERE d_week_seq1 = d_week_seq2 - 53 +ORDER BY d_week_seq1 diff --git a/core/src/test/resources/tpcds-queries-double/q20.sql b/core/src/test/resources/tpcds-queries-double/q20.sql new file mode 100755 index 000000000..7ac6c7a75 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q20.sql @@ -0,0 +1,18 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(cs_ext_sales_price) AS itemrevenue, + sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM catalog_sales, item, date_dim +WHERE cs_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) +AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q21.sql b/core/src/test/resources/tpcds-queries-double/q21.sql new file mode 100755 index 000000000..550881143 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q21.sql @@ -0,0 +1,25 @@ +SELECT * +FROM ( + SELECT + w_warehouse_name, + i_item_id, + sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) + THEN inv_quantity_on_hand + ELSE 0 END) AS inv_before, + sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) + THEN inv_quantity_on_hand + ELSE 0 END) AS inv_after + FROM inventory, warehouse, item, date_dim + WHERE i_current_price BETWEEN 0.99 AND 1.49 + AND i_item_sk = inv_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) + AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) + GROUP BY w_warehouse_name, i_item_id) x +WHERE (CASE WHEN inv_before > 0 + THEN inv_after / inv_before + ELSE NULL + END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0 +ORDER BY w_warehouse_name, i_item_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q22.sql b/core/src/test/resources/tpcds-queries-double/q22.sql new file mode 100755 index 000000000..add3b41f7 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q22.sql @@ -0,0 +1,14 @@ +SELECT + i_product_name, + i_brand, + i_class, + i_category, + avg(inv_quantity_on_hand) qoh +FROM inventory, date_dim, item, warehouse +WHERE inv_date_sk = d_date_sk + AND inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 +GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) +ORDER BY qoh, i_product_name, i_brand, i_class, i_category +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q23a.sql b/core/src/test/resources/tpcds-queries-double/q23a.sql new file mode 100755 index 000000000..37791f643 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q23a.sql @@ -0,0 +1,53 @@ +WITH frequent_ss_items AS +(SELECT + substr(i_item_desc, 1, 30) itemdesc, + i_item_sk item_sk, + d_date solddate, + count(*) cnt + FROM store_sales, date_dim, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING count(*) > 4), + max_store_sales AS + (SELECT max(csales) tpcds_cmax + FROM (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) csales + FROM store_sales, customer, date_dim + WHERE ss_customer_sk = c_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY c_customer_sk) x), + best_ss_customer AS + (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) ssales + FROM store_sales, customer + WHERE ss_customer_sk = c_customer_sk + GROUP BY c_customer_sk + HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * + (SELECT * + FROM max_store_sales)) +SELECT sum(sales) +FROM ((SELECT cs_quantity * cs_list_price sales +FROM catalog_sales, date_dim +WHERE d_year = 2000 + AND d_moy = 2 + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk IN (SELECT item_sk +FROM frequent_ss_items) + AND cs_bill_customer_sk IN (SELECT c_customer_sk +FROM best_ss_customer)) + UNION ALL + (SELECT ws_quantity * ws_list_price sales + FROM web_sales, date_dim + WHERE d_year = 2000 + AND d_moy = 2 + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk IN (SELECT item_sk + FROM frequent_ss_items) + AND ws_bill_customer_sk IN (SELECT c_customer_sk + FROM best_ss_customer))) y +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q23b.sql b/core/src/test/resources/tpcds-queries-double/q23b.sql new file mode 100755 index 000000000..01150197a --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q23b.sql @@ -0,0 +1,68 @@ +WITH frequent_ss_items AS +(SELECT + substr(i_item_desc, 1, 30) itemdesc, + i_item_sk item_sk, + d_date solddate, + count(*) cnt + FROM store_sales, date_dim, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING count(*) > 4), + max_store_sales AS + (SELECT max(csales) tpcds_cmax + FROM (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) csales + FROM store_sales, customer, date_dim + WHERE ss_customer_sk = c_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY c_customer_sk) x), + best_ss_customer AS + (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) ssales + FROM store_sales + , customer + WHERE ss_customer_sk = c_customer_sk + GROUP BY c_customer_sk + HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * + (SELECT * + FROM max_store_sales)) +SELECT + c_last_name, + c_first_name, + sales +FROM ((SELECT + c_last_name, + c_first_name, + sum(cs_quantity * cs_list_price) sales +FROM catalog_sales, customer, date_dim +WHERE d_year = 2000 + AND d_moy = 2 + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk IN (SELECT item_sk +FROM frequent_ss_items) + AND cs_bill_customer_sk IN (SELECT c_customer_sk +FROM best_ss_customer) + AND cs_bill_customer_sk = c_customer_sk +GROUP BY c_last_name, c_first_name) + UNION ALL + (SELECT + c_last_name, + c_first_name, + sum(ws_quantity * ws_list_price) sales + FROM web_sales, customer, date_dim + WHERE d_year = 2000 + AND d_moy = 2 + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk IN (SELECT item_sk + FROM frequent_ss_items) + AND ws_bill_customer_sk IN (SELECT c_customer_sk + FROM best_ss_customer) + AND ws_bill_customer_sk = c_customer_sk + GROUP BY c_last_name, c_first_name)) y +ORDER BY c_last_name, c_first_name, sales +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q24a.sql b/core/src/test/resources/tpcds-queries-double/q24a.sql new file mode 100755 index 000000000..bcc189486 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q24a.sql @@ -0,0 +1,34 @@ +WITH ssales AS +(SELECT + c_last_name, + c_first_name, + s_store_name, + ca_state, + s_state, + i_color, + i_current_price, + i_manager_id, + i_units, + i_size, + sum(ss_net_paid) netpaid + FROM store_sales, store_returns, store, item, customer, customer_address + WHERE ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_customer_sk = c_customer_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND c_birth_country = upper(ca_country) + AND s_zip = ca_zip + AND s_market_id = 8 + GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, + i_current_price, i_manager_id, i_units, i_size) +SELECT + c_last_name, + c_first_name, + s_store_name, + sum(netpaid) paid +FROM ssales +WHERE i_color = 'pale' +GROUP BY c_last_name, c_first_name, s_store_name +HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) +FROM ssales) diff --git a/core/src/test/resources/tpcds-queries-double/q24b.sql b/core/src/test/resources/tpcds-queries-double/q24b.sql new file mode 100755 index 000000000..830eb670b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q24b.sql @@ -0,0 +1,34 @@ +WITH ssales AS +(SELECT + c_last_name, + c_first_name, + s_store_name, + ca_state, + s_state, + i_color, + i_current_price, + i_manager_id, + i_units, + i_size, + sum(ss_net_paid) netpaid + FROM store_sales, store_returns, store, item, customer, customer_address + WHERE ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_customer_sk = c_customer_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND c_birth_country = upper(ca_country) + AND s_zip = ca_zip + AND s_market_id = 8 + GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, + i_color, i_current_price, i_manager_id, i_units, i_size) +SELECT + c_last_name, + c_first_name, + s_store_name, + sum(netpaid) paid +FROM ssales +WHERE i_color = 'chiffon' +GROUP BY c_last_name, c_first_name, s_store_name +HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) +FROM ssales) diff --git a/core/src/test/resources/tpcds-queries-double/q25.sql b/core/src/test/resources/tpcds-queries-double/q25.sql new file mode 100755 index 000000000..a4d78a3c5 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q25.sql @@ -0,0 +1,33 @@ +SELECT + i_item_id, + i_item_desc, + s_store_id, + s_store_name, + sum(ss_net_profit) AS store_sales_profit, + sum(sr_net_loss) AS store_returns_loss, + sum(cs_net_profit) AS catalog_sales_profit +FROM + store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, + store, item +WHERE + d1.d_moy = 4 + AND d1.d_year = 2001 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_moy BETWEEN 4 AND 10 + AND d2.d_year = 2001 + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_moy BETWEEN 4 AND 10 + AND d3.d_year = 2001 +GROUP BY + i_item_id, i_item_desc, s_store_id, s_store_name +ORDER BY + i_item_id, i_item_desc, s_store_id, s_store_name +LIMIT 100 \ No newline at end of file diff --git a/core/src/test/resources/tpcds-queries-double/q26.sql b/core/src/test/resources/tpcds-queries-double/q26.sql new file mode 100755 index 000000000..6d395a1d7 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q26.sql @@ -0,0 +1,19 @@ +SELECT + i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 +FROM catalog_sales, customer_demographics, date_dim, item, promotion +WHERE cs_sold_date_sk = d_date_sk AND + cs_item_sk = i_item_sk AND + cs_bill_cdemo_sk = cd_demo_sk AND + cs_promo_sk = p_promo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + (p_channel_email = 'N' OR p_channel_event = 'N') AND + d_year = 2000 +GROUP BY i_item_id +ORDER BY i_item_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q27.sql b/core/src/test/resources/tpcds-queries-double/q27.sql new file mode 100755 index 000000000..b0e2fd95f --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q27.sql @@ -0,0 +1,21 @@ +SELECT + i_item_id, + s_state, + grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 +FROM store_sales, customer_demographics, date_dim, store, item +WHERE ss_sold_date_sk = d_date_sk AND + ss_item_sk = i_item_sk AND + ss_store_sk = s_store_sk AND + ss_cdemo_sk = cd_demo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + d_year = 2002 AND + s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') +GROUP BY ROLLUP (i_item_id, s_state) +ORDER BY i_item_id, s_state +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q28.sql b/core/src/test/resources/tpcds-queries-double/q28.sql new file mode 100755 index 000000000..f34c2bb0e --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q28.sql @@ -0,0 +1,56 @@ +SELECT * +FROM (SELECT + avg(ss_list_price) B1_LP, + count(ss_list_price) B1_CNT, + count(DISTINCT ss_list_price) B1_CNTD +FROM store_sales +WHERE ss_quantity BETWEEN 0 AND 5 + AND (ss_list_price BETWEEN 8 AND 8 + 10 + OR ss_coupon_amt BETWEEN 459 AND 459 + 1000 + OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1, + (SELECT + avg(ss_list_price) B2_LP, + count(ss_list_price) B2_CNT, + count(DISTINCT ss_list_price) B2_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 6 AND 10 + AND (ss_list_price BETWEEN 90 AND 90 + 10 + OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000 + OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2, + (SELECT + avg(ss_list_price) B3_LP, + count(ss_list_price) B3_CNT, + count(DISTINCT ss_list_price) B3_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 11 AND 15 + AND (ss_list_price BETWEEN 142 AND 142 + 10 + OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000 + OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3, + (SELECT + avg(ss_list_price) B4_LP, + count(ss_list_price) B4_CNT, + count(DISTINCT ss_list_price) B4_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 16 AND 20 + AND (ss_list_price BETWEEN 135 AND 135 + 10 + OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000 + OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4, + (SELECT + avg(ss_list_price) B5_LP, + count(ss_list_price) B5_CNT, + count(DISTINCT ss_list_price) B5_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 25 + AND (ss_list_price BETWEEN 122 AND 122 + 10 + OR ss_coupon_amt BETWEEN 836 AND 836 + 1000 + OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5, + (SELECT + avg(ss_list_price) B6_LP, + count(ss_list_price) B6_CNT, + count(DISTINCT ss_list_price) B6_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 26 AND 30 + AND (ss_list_price BETWEEN 154 AND 154 + 10 + OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000 + OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6 +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q29.sql b/core/src/test/resources/tpcds-queries-double/q29.sql new file mode 100755 index 000000000..3f1fd553f --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q29.sql @@ -0,0 +1,32 @@ +SELECT + i_item_id, + i_item_desc, + s_store_id, + s_store_name, + sum(ss_quantity) AS store_sales_quantity, + sum(sr_return_quantity) AS store_returns_quantity, + sum(cs_quantity) AS catalog_sales_quantity +FROM + store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, + date_dim d3, store, item +WHERE + d1.d_moy = 9 + AND d1.d_year = 1999 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_moy BETWEEN 9 AND 9 + 3 + AND d2.d_year = 1999 + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_year IN (1999, 1999 + 1, 1999 + 2) +GROUP BY + i_item_id, i_item_desc, s_store_id, s_store_name +ORDER BY + i_item_id, i_item_desc, s_store_id, s_store_name +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q3.sql b/core/src/test/resources/tpcds-queries-double/q3.sql new file mode 100755 index 000000000..181509df9 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q3.sql @@ -0,0 +1,13 @@ +SELECT + dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + SUM(ss_ext_sales_price) sum_agg +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manufact_id = 128 + AND dt.d_moy = 11 +GROUP BY dt.d_year, item.i_brand, item.i_brand_id +ORDER BY dt.d_year, sum_agg DESC, brand_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q30.sql b/core/src/test/resources/tpcds-queries-double/q30.sql new file mode 100755 index 000000000..986bef566 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q30.sql @@ -0,0 +1,35 @@ +WITH customer_total_return AS +(SELECT + wr_returning_customer_sk AS ctr_customer_sk, + ca_state AS ctr_state, + sum(wr_return_amt) AS ctr_total_return + FROM web_returns, date_dim, customer_address + WHERE wr_returned_date_sk = d_date_sk + AND d_year = 2002 + AND wr_returning_addr_sk = ca_address_sk + GROUP BY wr_returning_customer_sk, ca_state) +SELECT + c_customer_id, + c_salutation, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_day, + c_birth_month, + c_birth_year, + c_birth_country, + c_login, + c_email_address, + c_last_review_date, + ctr_total_return +FROM customer_total_return ctr1, customer_address, customer +WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 +FROM customer_total_return ctr2 +WHERE ctr1.ctr_state = ctr2.ctr_state) + AND ca_address_sk = c_current_addr_sk + AND ca_state = 'GA' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag + , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address + , c_last_review_date, ctr_total_return +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q31.sql b/core/src/test/resources/tpcds-queries-double/q31.sql new file mode 100755 index 000000000..3e543d543 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q31.sql @@ -0,0 +1,60 @@ +WITH ss AS +(SELECT + ca_county, + d_qoy, + d_year, + sum(ss_ext_sales_price) AS store_sales + FROM store_sales, date_dim, customer_address + WHERE ss_sold_date_sk = d_date_sk + AND ss_addr_sk = ca_address_sk + GROUP BY ca_county, d_qoy, d_year), + ws AS + (SELECT + ca_county, + d_qoy, + d_year, + sum(ws_ext_sales_price) AS web_sales + FROM web_sales, date_dim, customer_address + WHERE ws_sold_date_sk = d_date_sk + AND ws_bill_addr_sk = ca_address_sk + GROUP BY ca_county, d_qoy, d_year) +SELECT + ss1.ca_county, + ss1.d_year, + ws2.web_sales / ws1.web_sales web_q1_q2_increase, + ss2.store_sales / ss1.store_sales store_q1_q2_increase, + ws3.web_sales / ws2.web_sales web_q2_q3_increase, + ss3.store_sales / ss2.store_sales store_q2_q3_increase +FROM + ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 +WHERE + ss1.d_qoy = 1 + AND ss1.d_year = 2000 + AND ss1.ca_county = ss2.ca_county + AND ss2.d_qoy = 2 + AND ss2.d_year = 2000 + AND ss2.ca_county = ss3.ca_county + AND ss3.d_qoy = 3 + AND ss3.d_year = 2000 + AND ss1.ca_county = ws1.ca_county + AND ws1.d_qoy = 1 + AND ws1.d_year = 2000 + AND ws1.ca_county = ws2.ca_county + AND ws2.d_qoy = 2 + AND ws2.d_year = 2000 + AND ws1.ca_county = ws3.ca_county + AND ws3.d_qoy = 3 + AND ws3.d_year = 2000 + AND CASE WHEN ws1.web_sales > 0 + THEN ws2.web_sales / ws1.web_sales + ELSE NULL END + > CASE WHEN ss1.store_sales > 0 + THEN ss2.store_sales / ss1.store_sales + ELSE NULL END + AND CASE WHEN ws2.web_sales > 0 + THEN ws3.web_sales / ws2.web_sales + ELSE NULL END + > CASE WHEN ss2.store_sales > 0 + THEN ss3.store_sales / ss2.store_sales + ELSE NULL END +ORDER BY ss1.ca_county diff --git a/core/src/test/resources/tpcds-queries-double/q32.sql b/core/src/test/resources/tpcds-queries-double/q32.sql new file mode 100755 index 000000000..a6f59ecb8 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q32.sql @@ -0,0 +1,15 @@ +SELECT 1 AS `excess discount amount ` +FROM + catalog_sales, item, date_dim +WHERE + i_manufact_id = 977 + AND i_item_sk = cs_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) + AND d_date_sk = cs_sold_date_sk + AND cs_ext_discount_amt > ( + SELECT 1.3 * avg(cs_ext_discount_amt) + FROM catalog_sales, date_dim + WHERE cs_item_sk = i_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) + AND d_date_sk = cs_sold_date_sk) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q33.sql b/core/src/test/resources/tpcds-queries-double/q33.sql new file mode 100755 index 000000000..d24856aa5 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q33.sql @@ -0,0 +1,65 @@ +WITH ss AS ( + SELECT + i_manufact_id, + sum(ss_ext_sales_price) total_sales + FROM + store_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN (SELECT i_manufact_id + FROM item + WHERE i_category IN ('Electronics')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id), cs AS +(SELECT + i_manufact_id, + sum(cs_ext_sales_price) total_sales + FROM catalog_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN ( + SELECT i_manufact_id + FROM item + WHERE + i_category IN ('Electronics')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id), + ws AS ( + SELECT + i_manufact_id, + sum(ws_ext_sales_price) total_sales + FROM + web_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN (SELECT i_manufact_id + FROM item + WHERE i_category IN ('Electronics')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id) +SELECT + i_manufact_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_manufact_id +ORDER BY total_sales +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q34.sql b/core/src/test/resources/tpcds-queries-double/q34.sql new file mode 100755 index 000000000..33396bf16 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q34.sql @@ -0,0 +1,32 @@ +SELECT + c_last_name, + c_first_name, + c_salutation, + c_preferred_cust_flag, + ss_ticket_number, + cnt +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + count(*) cnt + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28) + AND (household_demographics.hd_buy_potential = '>10000' OR + household_demographics.hd_buy_potential = 'unknown') + AND household_demographics.hd_vehicle_count > 0 + AND (CASE WHEN household_demographics.hd_vehicle_count > 0 + THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count + ELSE NULL + END) > 1.2 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_county IN + ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', + 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') + GROUP BY ss_ticket_number, ss_customer_sk) dn, customer +WHERE ss_customer_sk = c_customer_sk + AND cnt BETWEEN 15 AND 20 +ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC diff --git a/core/src/test/resources/tpcds-queries-double/q35.sql b/core/src/test/resources/tpcds-queries-double/q35.sql new file mode 100755 index 000000000..cfe4342d8 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q35.sql @@ -0,0 +1,46 @@ +SELECT + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + min(cd_dep_count), + max(cd_dep_count), + avg(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + min(cd_dep_employed_count), + max(cd_dep_employed_count), + avg(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + min(cd_dep_college_count), + max(cd_dep_college_count), + avg(cd_dep_college_count) +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4) AND + (exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4) OR + exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4)) +GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, + cd_dep_employed_count, cd_dep_college_count +ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, + cd_dep_employed_count, cd_dep_college_count +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q36.sql b/core/src/test/resources/tpcds-queries-double/q36.sql new file mode 100755 index 000000000..a8f93df76 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q36.sql @@ -0,0 +1,26 @@ +SELECT + sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin, + i_category, + i_class, + grouping(i_category) + grouping(i_class) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(i_category) + grouping(i_class), + CASE WHEN grouping(i_class) = 0 + THEN i_category END + ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent +FROM + store_sales, date_dim d1, item, store +WHERE + d1.d_year = 2001 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') +GROUP BY ROLLUP (i_category, i_class) +ORDER BY + lochierarchy DESC + , CASE WHEN lochierarchy = 0 + THEN i_category END + , rank_within_parent +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q37.sql b/core/src/test/resources/tpcds-queries-double/q37.sql new file mode 100755 index 000000000..11b3821fa --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q37.sql @@ -0,0 +1,15 @@ +SELECT + i_item_id, + i_item_desc, + i_current_price +FROM item, inventory, date_dim, catalog_sales +WHERE i_current_price BETWEEN 68 AND 68 + 30 + AND inv_item_sk = i_item_sk + AND d_date_sk = inv_date_sk + AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days) + AND i_manufact_id IN (677, 940, 694, 808) + AND inv_quantity_on_hand BETWEEN 100 AND 500 + AND cs_item_sk = i_item_sk +GROUP BY i_item_id, i_item_desc, i_current_price +ORDER BY i_item_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q38.sql b/core/src/test/resources/tpcds-queries-double/q38.sql new file mode 100755 index 000000000..1c8d53ee2 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q38.sql @@ -0,0 +1,30 @@ +SELECT count(*) +FROM ( + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM store_sales, date_dim, customer + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + INTERSECT + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM catalog_sales, date_dim, customer + WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + INTERSECT + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM web_sales, date_dim, customer + WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk + AND web_sales.ws_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + ) hot_cust +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q39a.sql b/core/src/test/resources/tpcds-queries-double/q39a.sql new file mode 100755 index 000000000..9fc4c1701 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q39a.sql @@ -0,0 +1,47 @@ +WITH inv AS +(SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stdev, + mean, + CASE mean + WHEN 0 + THEN NULL + ELSE stdev / mean END cov + FROM (SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stddev_samp(inv_quantity_on_hand) stdev, + avg(inv_quantity_on_hand) mean + FROM inventory, item, warehouse, date_dim + WHERE inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_year = 2001 + GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo + WHERE CASE mean + WHEN 0 + THEN 0 + ELSE stdev / mean END > 1) +SELECT + inv1.w_warehouse_sk, + inv1.i_item_sk, + inv1.d_moy, + inv1.mean, + inv1.cov, + inv2.w_warehouse_sk, + inv2.i_item_sk, + inv2.d_moy, + inv2.mean, + inv2.cov +FROM inv inv1, inv inv2 +WHERE inv1.i_item_sk = inv2.i_item_sk + AND inv1.w_warehouse_sk = inv2.w_warehouse_sk + AND inv1.d_moy = 1 + AND inv2.d_moy = 1 + 1 +ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov + , inv2.d_moy, inv2.mean, inv2.cov diff --git a/core/src/test/resources/tpcds-queries-double/q39b.sql b/core/src/test/resources/tpcds-queries-double/q39b.sql new file mode 100755 index 000000000..6f8493029 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q39b.sql @@ -0,0 +1,48 @@ +WITH inv AS +(SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stdev, + mean, + CASE mean + WHEN 0 + THEN NULL + ELSE stdev / mean END cov + FROM (SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stddev_samp(inv_quantity_on_hand) stdev, + avg(inv_quantity_on_hand) mean + FROM inventory, item, warehouse, date_dim + WHERE inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_year = 2001 + GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo + WHERE CASE mean + WHEN 0 + THEN 0 + ELSE stdev / mean END > 1) +SELECT + inv1.w_warehouse_sk, + inv1.i_item_sk, + inv1.d_moy, + inv1.mean, + inv1.cov, + inv2.w_warehouse_sk, + inv2.i_item_sk, + inv2.d_moy, + inv2.mean, + inv2.cov +FROM inv inv1, inv inv2 +WHERE inv1.i_item_sk = inv2.i_item_sk + AND inv1.w_warehouse_sk = inv2.w_warehouse_sk + AND inv1.d_moy = 1 + AND inv2.d_moy = 1 + 1 + AND inv1.cov > 1.5 +ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov + , inv2.d_moy, inv2.mean, inv2.cov diff --git a/core/src/test/resources/tpcds-queries-double/q4.sql b/core/src/test/resources/tpcds-queries-double/q4.sql new file mode 100755 index 000000000..b9f27fbc9 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q4.sql @@ -0,0 +1,120 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt) + + ss_ext_sales_price) / 2) year_total, + 's' sale_type + FROM customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum((((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt) + + cs_ext_sales_price) / 2)) year_total, + 'c' sale_type + FROM customer, catalog_sales, date_dim + WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum((((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt) + ws_ext_sales_price) / + 2)) year_total, + 'w' sale_type + FROM customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year) +SELECT + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name, + t_s_secyear.customer_preferred_cust_flag, + t_s_secyear.customer_birth_country, + t_s_secyear.customer_login, + t_s_secyear.customer_email_address +FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, + year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_c_secyear.customer_id + AND t_s_firstyear.customer_id = t_c_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_c_firstyear.sale_type = 'c' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_c_secyear.sale_type = 'c' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.dyear = 2001 + AND t_s_secyear.dyear = 2001 + 1 + AND t_c_firstyear.dyear = 2001 + AND t_c_secyear.dyear = 2001 + 1 + AND t_w_firstyear.dyear = 2001 + AND t_w_secyear.dyear = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_c_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_c_firstyear.year_total > 0 + THEN t_c_secyear.year_total / t_c_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END + AND CASE WHEN t_c_firstyear.year_total > 0 + THEN t_c_secyear.year_total / t_c_firstyear.year_total + ELSE NULL END + > CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END +ORDER BY + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name, + t_s_secyear.customer_preferred_cust_flag, + t_s_secyear.customer_birth_country, + t_s_secyear.customer_login, + t_s_secyear.customer_email_address +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q40.sql b/core/src/test/resources/tpcds-queries-double/q40.sql new file mode 100755 index 000000000..66d8b73ac --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q40.sql @@ -0,0 +1,25 @@ +SELECT + w_state, + i_item_id, + sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) + THEN cs_sales_price - coalesce(cr_refunded_cash, 0) + ELSE 0 END) AS sales_before, + sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) + THEN cs_sales_price - coalesce(cr_refunded_cash, 0) + ELSE 0 END) AS sales_after +FROM + catalog_sales + LEFT OUTER JOIN catalog_returns ON + (cs_order_number = cr_order_number + AND cs_item_sk = cr_item_sk) + , warehouse, item, date_dim +WHERE + i_current_price BETWEEN 0.99 AND 1.49 + AND i_item_sk = cs_item_sk + AND cs_warehouse_sk = w_warehouse_sk + AND cs_sold_date_sk = d_date_sk + AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) + AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) +GROUP BY w_state, i_item_id +ORDER BY w_state, i_item_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q41.sql b/core/src/test/resources/tpcds-queries-double/q41.sql new file mode 100755 index 000000000..25e317e0e --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q41.sql @@ -0,0 +1,49 @@ +SELECT DISTINCT (i_product_name) +FROM item i1 +WHERE i_manufact_id BETWEEN 738 AND 738 + 40 + AND (SELECT count(*) AS item_cnt +FROM item +WHERE (i_manufact = i1.i_manufact AND + ((i_category = 'Women' AND + (i_color = 'powder' OR i_color = 'khaki') AND + (i_units = 'Ounce' OR i_units = 'Oz') AND + (i_size = 'medium' OR i_size = 'extra large') + ) OR + (i_category = 'Women' AND + (i_color = 'brown' OR i_color = 'honeydew') AND + (i_units = 'Bunch' OR i_units = 'Ton') AND + (i_size = 'N/A' OR i_size = 'small') + ) OR + (i_category = 'Men' AND + (i_color = 'floral' OR i_color = 'deep') AND + (i_units = 'N/A' OR i_units = 'Dozen') AND + (i_size = 'petite' OR i_size = 'large') + ) OR + (i_category = 'Men' AND + (i_color = 'light' OR i_color = 'cornflower') AND + (i_units = 'Box' OR i_units = 'Pound') AND + (i_size = 'medium' OR i_size = 'extra large') + ))) OR + (i_manufact = i1.i_manufact AND + ((i_category = 'Women' AND + (i_color = 'midnight' OR i_color = 'snow') AND + (i_units = 'Pallet' OR i_units = 'Gross') AND + (i_size = 'medium' OR i_size = 'extra large') + ) OR + (i_category = 'Women' AND + (i_color = 'cyan' OR i_color = 'papaya') AND + (i_units = 'Cup' OR i_units = 'Dram') AND + (i_size = 'N/A' OR i_size = 'small') + ) OR + (i_category = 'Men' AND + (i_color = 'orange' OR i_color = 'frosted') AND + (i_units = 'Each' OR i_units = 'Tbl') AND + (i_size = 'petite' OR i_size = 'large') + ) OR + (i_category = 'Men' AND + (i_color = 'forest' OR i_color = 'ghost') AND + (i_units = 'Lb' OR i_units = 'Bundle') AND + (i_size = 'medium' OR i_size = 'extra large') + )))) > 0 +ORDER BY i_product_name +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q42.sql b/core/src/test/resources/tpcds-queries-double/q42.sql new file mode 100755 index 000000000..4d2e71760 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q42.sql @@ -0,0 +1,18 @@ +SELECT + dt.d_year, + item.i_category_id, + item.i_category, + sum(ss_ext_sales_price) +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manager_id = 1 + AND dt.d_moy = 11 + AND dt.d_year = 2000 +GROUP BY dt.d_year + , item.i_category_id + , item.i_category +ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year + , item.i_category_id + , item.i_category +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q43.sql b/core/src/test/resources/tpcds-queries-double/q43.sql new file mode 100755 index 000000000..45411772c --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q43.sql @@ -0,0 +1,33 @@ +SELECT + s_store_name, + s_store_id, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN ss_sales_price + ELSE NULL END) sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN ss_sales_price + ELSE NULL END) mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN ss_sales_price + ELSE NULL END) tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN ss_sales_price + ELSE NULL END) wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN ss_sales_price + ELSE NULL END) thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN ss_sales_price + ELSE NULL END) fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN ss_sales_price + ELSE NULL END) sat_sales +FROM date_dim, store_sales, store +WHERE d_date_sk = ss_sold_date_sk AND + s_store_sk = ss_store_sk AND + s_gmt_offset = -5 AND + d_year = 2000 +GROUP BY s_store_name, s_store_id +ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, + thu_sales, fri_sales, sat_sales +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q44.sql b/core/src/test/resources/tpcds-queries-double/q44.sql new file mode 100755 index 000000000..379e60478 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q44.sql @@ -0,0 +1,46 @@ +SELECT + asceding.rnk, + i1.i_product_name best_performing, + i2.i_product_name worst_performing +FROM (SELECT * +FROM (SELECT + item_sk, + rank() + OVER ( + ORDER BY rank_col ASC) rnk +FROM (SELECT + ss_item_sk item_sk, + avg(ss_net_profit) rank_col +FROM store_sales ss1 +WHERE ss_store_sk = 4 +GROUP BY ss_item_sk +HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col +FROM store_sales +WHERE ss_store_sk = 4 + AND ss_addr_sk IS NULL +GROUP BY ss_store_sk)) V1) V11 +WHERE rnk < 11) asceding, + (SELECT * + FROM (SELECT + item_sk, + rank() + OVER ( + ORDER BY rank_col DESC) rnk + FROM (SELECT + ss_item_sk item_sk, + avg(ss_net_profit) rank_col + FROM store_sales ss1 + WHERE ss_store_sk = 4 + GROUP BY ss_item_sk + HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col + FROM store_sales + WHERE ss_store_sk = 4 + AND ss_addr_sk IS NULL + GROUP BY ss_store_sk)) V2) V21 + WHERE rnk < 11) descending, + item i1, item i2 +WHERE asceding.rnk = descending.rnk + AND i1.i_item_sk = asceding.item_sk + AND i2.i_item_sk = descending.item_sk +ORDER BY asceding.rnk +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q45.sql b/core/src/test/resources/tpcds-queries-double/q45.sql new file mode 100755 index 000000000..907438f19 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q45.sql @@ -0,0 +1,21 @@ +SELECT + ca_zip, + ca_city, + sum(ws_sales_price) +FROM web_sales, customer, customer_address, date_dim, item +WHERE ws_bill_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND ws_item_sk = i_item_sk + AND (substr(ca_zip, 1, 5) IN + ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') + OR + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) +) + AND ws_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 2001 +GROUP BY ca_zip, ca_city +ORDER BY ca_zip, ca_city +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q46.sql b/core/src/test/resources/tpcds-queries-double/q46.sql new file mode 100755 index 000000000..0911677df --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q46.sql @@ -0,0 +1,32 @@ +SELECT + c_last_name, + c_first_name, + ca_city, + bought_city, + ss_ticket_number, + amt, + profit +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + ca_city bought_city, + sum(ss_coupon_amt) amt, + sum(ss_net_profit) profit + FROM store_sales, date_dim, store, household_demographics, customer_address + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND store_sales.ss_addr_sk = customer_address.ca_address_sk + AND (household_demographics.hd_dep_count = 4 OR + household_demographics.hd_vehicle_count = 3) + AND date_dim.d_dow IN (6, 0) + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') + GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, + customer_address current_addr +WHERE ss_customer_sk = c_customer_sk + AND customer.c_current_addr_sk = current_addr.ca_address_sk + AND current_addr.ca_city <> bought_city +ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q47.sql b/core/src/test/resources/tpcds-queries-double/q47.sql new file mode 100755 index 000000000..cfc37a4ce --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q47.sql @@ -0,0 +1,63 @@ +WITH v1 AS ( + SELECT + i_category, + i_brand, + s_store_name, + s_company_name, + d_year, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER + (PARTITION BY i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() + OVER + (PARTITION BY i_category, i_brand, + s_store_name, s_company_name + ORDER BY d_year, d_moy) rn + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + ( + d_year = 1999 OR + (d_year = 1999 - 1 AND d_moy = 12) OR + (d_year = 1999 + 1 AND d_moy = 1) + ) + GROUP BY i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 AS ( + SELECT + v1.i_category, + v1.i_brand, + v1.s_store_name, + v1.s_company_name, + v1.d_year, + v1.d_moy, + v1.avg_monthly_sales, + v1.sum_sales, + v1_lag.sum_sales psum, + v1_lead.sum_sales nsum + FROM v1, v1 v1_lag, v1 v1_lead + WHERE v1.i_category = v1_lag.i_category AND + v1.i_category = v1_lead.i_category AND + v1.i_brand = v1_lag.i_brand AND + v1.i_brand = v1_lead.i_brand AND + v1.s_store_name = v1_lag.s_store_name AND + v1.s_store_name = v1_lead.s_store_name AND + v1.s_company_name = v1_lag.s_company_name AND + v1.s_company_name = v1_lead.s_company_name AND + v1.rn = v1_lag.rn + 1 AND + v1.rn = v1_lead.rn - 1) +SELECT * +FROM v2 +WHERE d_year = 1999 AND + avg_monthly_sales > 0 AND + CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, 3 +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q48.sql b/core/src/test/resources/tpcds-queries-double/q48.sql new file mode 100755 index 000000000..fdb9f38e2 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q48.sql @@ -0,0 +1,63 @@ +SELECT sum(ss_quantity) +FROM store_sales, store, customer_demographics, customer_address, date_dim +WHERE s_store_sk = ss_store_sk + AND ss_sold_date_sk = d_date_sk AND d_year = 2001 + AND + ( + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'M' + AND + cd_education_status = '4 yr Degree' + AND + ss_sales_price BETWEEN 100.00 AND 150.00 + ) + OR + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'D' + AND + cd_education_status = '2 yr Degree' + AND + ss_sales_price BETWEEN 50.00 AND 100.00 + ) + OR + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'S' + AND + cd_education_status = 'College' + AND + ss_sales_price BETWEEN 150.00 AND 200.00 + ) + ) + AND + ( + ( + ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('CO', 'OH', 'TX') + AND ss_net_profit BETWEEN 0 AND 2000 + ) + OR + (ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('OR', 'MN', 'KY') + AND ss_net_profit BETWEEN 150 AND 3000 + ) + OR + (ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('VA', 'CA', 'MS') + AND ss_net_profit BETWEEN 50 AND 25000 + ) + ) diff --git a/core/src/test/resources/tpcds-queries-double/q49.sql b/core/src/test/resources/tpcds-queries-double/q49.sql new file mode 100755 index 000000000..2f7f497cf --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q49.sql @@ -0,0 +1,126 @@ +SELECT + 'web' AS channel, + web.item, + web.return_ratio, + web.return_rank, + web.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + ws.ws_item_sk AS item, + (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DOUBLE) / + cast(sum(coalesce(ws.ws_quantity, 0)) AS DOUBLE)) AS return_ratio, + (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DOUBLE) / + cast(sum(coalesce(ws.ws_net_paid, 0)) AS DOUBLE)) AS currency_ratio + FROM + web_sales ws LEFT OUTER JOIN web_returns wr + ON (ws.ws_order_number = wr.wr_order_number AND + ws.ws_item_sk = wr.wr_item_sk) + , date_dim + WHERE + wr.wr_return_amt > 10000 + AND ws.ws_net_profit > 1 + AND ws.ws_net_paid > 0 + AND ws.ws_quantity > 0 + AND ws_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY ws.ws_item_sk + ) in_web + ) web +WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) +UNION +SELECT + 'catalog' AS channel, + catalog.item, + catalog.return_ratio, + catalog.return_rank, + catalog.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + cs.cs_item_sk AS item, + (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DOUBLE) / + cast(sum(coalesce(cs.cs_quantity, 0)) AS DOUBLE)) AS return_ratio, + (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DOUBLE) / + cast(sum(coalesce(cs.cs_net_paid, 0)) AS DOUBLE)) AS currency_ratio + FROM + catalog_sales cs LEFT OUTER JOIN catalog_returns cr + ON (cs.cs_order_number = cr.cr_order_number AND + cs.cs_item_sk = cr.cr_item_sk) + , date_dim + WHERE + cr.cr_return_amount > 10000 + AND cs.cs_net_profit > 1 + AND cs.cs_net_paid > 0 + AND cs.cs_quantity > 0 + AND cs_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY cs.cs_item_sk + ) in_cat + ) catalog +WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) +UNION +SELECT + 'store' AS channel, + store.item, + store.return_ratio, + store.return_rank, + store.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + sts.ss_item_sk AS item, + (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DOUBLE) / + cast(sum(coalesce(sts.ss_quantity, 0)) AS DOUBLE)) AS return_ratio, + (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DOUBLE) / + cast(sum(coalesce(sts.ss_net_paid, 0)) AS DOUBLE)) AS currency_ratio + FROM + store_sales sts LEFT OUTER JOIN store_returns sr + ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) + , date_dim + WHERE + sr.sr_return_amt > 10000 + AND sts.ss_net_profit > 1 + AND sts.ss_net_paid > 0 + AND sts.ss_quantity > 0 + AND ss_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY sts.ss_item_sk + ) in_store + ) store +WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) +ORDER BY 1, 4, 5 +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q5.sql b/core/src/test/resources/tpcds-queries-double/q5.sql new file mode 100755 index 000000000..e242d008e --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q5.sql @@ -0,0 +1,131 @@ +WITH ssr AS +( SELECT + s_store_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + ss_store_sk AS store_sk, + ss_sold_date_sk AS date_sk, + ss_ext_sales_price AS sales_price, + ss_net_profit AS profit, + cast(0 AS DOUBLE) AS return_amt, + cast(0 AS DOUBLE) AS net_loss + FROM store_sales + UNION ALL + SELECT + sr_store_sk AS store_sk, + sr_returned_date_sk AS date_sk, + cast(0 AS DOUBLE) AS sales_price, + cast(0 AS DOUBLE) AS profit, + sr_return_amt AS return_amt, + sr_net_loss AS net_loss + FROM store_returns) + salesreturns, date_dim, store + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND store_sk = s_store_sk + GROUP BY s_store_id), + csr AS + ( SELECT + cp_catalog_page_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + cs_catalog_page_sk AS page_sk, + cs_sold_date_sk AS date_sk, + cs_ext_sales_price AS sales_price, + cs_net_profit AS profit, + cast(0 AS DOUBLE) AS return_amt, + cast(0 AS DOUBLE) AS net_loss + FROM catalog_sales + UNION ALL + SELECT + cr_catalog_page_sk AS page_sk, + cr_returned_date_sk AS date_sk, + cast(0 AS DOUBLE) AS sales_price, + cast(0 AS DOUBLE) AS profit, + cr_return_amount AS return_amt, + cr_net_loss AS net_loss + FROM catalog_returns + ) salesreturns, date_dim, catalog_page + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND page_sk = cp_catalog_page_sk + GROUP BY cp_catalog_page_id) + , + wsr AS + ( SELECT + web_site_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + ws_web_site_sk AS wsr_web_site_sk, + ws_sold_date_sk AS date_sk, + ws_ext_sales_price AS sales_price, + ws_net_profit AS profit, + cast(0 AS DOUBLE) AS return_amt, + cast(0 AS DOUBLE) AS net_loss + FROM web_sales + UNION ALL + SELECT + ws_web_site_sk AS wsr_web_site_sk, + wr_returned_date_sk AS date_sk, + cast(0 AS DOUBLE) AS sales_price, + cast(0 AS DOUBLE) AS profit, + wr_return_amt AS return_amt, + wr_net_loss AS net_loss + FROM web_returns + LEFT OUTER JOIN web_sales ON + (wr_item_sk = ws_item_sk + AND wr_order_number = ws_order_number) + ) salesreturns, date_dim, web_site + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND wsr_web_site_sk = web_site_sk + GROUP BY web_site_id) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM + (SELECT + 'store channel' AS channel, + concat('store', s_store_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM ssr + UNION ALL + SELECT + 'catalog channel' AS channel, + concat('catalog_page', cp_catalog_page_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM csr + UNION ALL + SELECT + 'web channel' AS channel, + concat('web_site', web_site_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM wsr + ) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q50.sql b/core/src/test/resources/tpcds-queries-double/q50.sql new file mode 100755 index 000000000..f1d4b1544 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q50.sql @@ -0,0 +1,47 @@ +SELECT + s_store_name, + s_company_id, + s_street_number, + s_street_name, + s_street_type, + s_suite_number, + s_city, + s_county, + s_state, + s_zip, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND + (sr_returned_date_sk - ss_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND + (sr_returned_date_sk - ss_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND + (sr_returned_date_sk - ss_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + store_sales, store_returns, store, date_dim d1, date_dim d2 +WHERE + d2.d_year = 2001 + AND d2.d_moy = 8 + AND ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND sr_returned_date_sk = d2.d_date_sk + AND ss_customer_sk = sr_customer_sk + AND ss_store_sk = s_store_sk +GROUP BY + s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, + s_suite_number, s_city, s_county, s_state, s_zip +ORDER BY + s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, + s_suite_number, s_city, s_county, s_state, s_zip +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q51.sql b/core/src/test/resources/tpcds-queries-double/q51.sql new file mode 100755 index 000000000..62b003eb6 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q51.sql @@ -0,0 +1,55 @@ +WITH web_v1 AS ( + SELECT + ws_item_sk item_sk, + d_date, + sum(sum(ws_sales_price)) + OVER (PARTITION BY ws_item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ws_item_sk IS NOT NULL + GROUP BY ws_item_sk, d_date), + store_v1 AS ( + SELECT + ss_item_sk item_sk, + d_date, + sum(sum(ss_sales_price)) + OVER (PARTITION BY ss_item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ss_item_sk IS NOT NULL + GROUP BY ss_item_sk, d_date) +SELECT * +FROM (SELECT + item_sk, + d_date, + web_sales, + store_sales, + max(web_sales) + OVER (PARTITION BY item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative, + max(store_sales) + OVER (PARTITION BY item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative +FROM (SELECT + CASE WHEN web.item_sk IS NOT NULL + THEN web.item_sk + ELSE store.item_sk END item_sk, + CASE WHEN web.d_date IS NOT NULL + THEN web.d_date + ELSE store.d_date END d_date, + web.cume_sales web_sales, + store.cume_sales store_sales +FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk + AND web.d_date = store.d_date) + ) x) y +WHERE web_cumulative > store_cumulative +ORDER BY item_sk, d_date +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q52.sql b/core/src/test/resources/tpcds-queries-double/q52.sql new file mode 100755 index 000000000..467d1ae05 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q52.sql @@ -0,0 +1,14 @@ +SELECT + dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + sum(ss_ext_sales_price) ext_price +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manager_id = 1 + AND dt.d_moy = 11 + AND dt.d_year = 2000 +GROUP BY dt.d_year, item.i_brand, item.i_brand_id +ORDER BY dt.d_year, ext_price DESC, brand_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q53.sql b/core/src/test/resources/tpcds-queries-double/q53.sql new file mode 100755 index 000000000..b42c68dcf --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q53.sql @@ -0,0 +1,30 @@ +SELECT * +FROM + (SELECT + i_manufact_id, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER (PARTITION BY i_manufact_id) avg_quarterly_sales + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, + 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND + ((i_category IN ('Books', 'Children', 'Electronics') AND + i_class IN ('personal', 'portable', 'reference', 'self-help') AND + i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', + 'exportiunivamalg #9', 'scholaramalgamalg #9')) + OR + (i_category IN ('Women', 'Music', 'Men') AND + i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND + i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', + 'importoamalg #1'))) + GROUP BY i_manufact_id, d_qoy) tmp1 +WHERE CASE WHEN avg_quarterly_sales > 0 + THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales + ELSE NULL END > 0.1 +ORDER BY avg_quarterly_sales, + sum_sales, + i_manufact_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q54.sql b/core/src/test/resources/tpcds-queries-double/q54.sql new file mode 100755 index 000000000..897237fb6 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q54.sql @@ -0,0 +1,61 @@ +WITH my_customers AS ( + SELECT DISTINCT + c_customer_sk, + c_current_addr_sk + FROM + (SELECT + cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + FROM catalog_sales + UNION ALL + SELECT + ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + FROM web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + WHERE sold_date_sk = d_date_sk + AND item_sk = i_item_sk + AND i_category = 'Women' + AND i_class = 'maternity' + AND c_customer_sk = cs_or_ws_sales.customer_sk + AND d_moy = 12 + AND d_year = 1998 +) + , my_revenue AS ( + SELECT + c_customer_sk, + sum(ss_ext_sales_price) AS revenue + FROM my_customers, + store_sales, + customer_address, + store, + date_dim + WHERE c_current_addr_sk = ca_address_sk + AND ca_county = s_county + AND ca_state = s_state + AND ss_sold_date_sk = d_date_sk + AND c_customer_sk = ss_customer_sk + AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 + FROM date_dim + WHERE d_year = 1998 AND d_moy = 12) + AND (SELECT DISTINCT d_month_seq + 3 + FROM date_dim + WHERE d_year = 1998 AND d_moy = 12) + GROUP BY c_customer_sk +) + , segments AS +(SELECT cast((revenue / 50) AS INT) AS segment + FROM my_revenue) +SELECT + segment, + count(*) AS num_customers, + segment * 50 AS segment_base +FROM segments +GROUP BY segment +ORDER BY segment, num_customers +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q55.sql b/core/src/test/resources/tpcds-queries-double/q55.sql new file mode 100755 index 000000000..bc5d888c9 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q55.sql @@ -0,0 +1,13 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + sum(ss_ext_sales_price) ext_price +FROM date_dim, store_sales, item +WHERE d_date_sk = ss_sold_date_sk + AND ss_item_sk = i_item_sk + AND i_manager_id = 28 + AND d_moy = 11 + AND d_year = 1999 +GROUP BY i_brand, i_brand_id +ORDER BY ext_price DESC, brand_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q56.sql b/core/src/test/resources/tpcds-queries-double/q56.sql new file mode 100755 index 000000000..2fa1738dc --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q56.sql @@ -0,0 +1,65 @@ +WITH ss AS ( + SELECT + i_item_id, + sum(ss_ext_sales_price) total_sales + FROM + store_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + cs AS ( + SELECT + i_item_id, + sum(cs_ext_sales_price) total_sales + FROM + catalog_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + ws AS ( + SELECT + i_item_id, + sum(ws_ext_sales_price) total_sales + FROM + web_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id) +SELECT + i_item_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_item_id +ORDER BY total_sales +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q57.sql b/core/src/test/resources/tpcds-queries-double/q57.sql new file mode 100755 index 000000000..cf70d4b90 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q57.sql @@ -0,0 +1,56 @@ +WITH v1 AS ( + SELECT + i_category, + i_brand, + cc_name, + d_year, + d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) + OVER + (PARTITION BY i_category, i_brand, cc_name, d_year) + avg_monthly_sales, + rank() + OVER + (PARTITION BY i_category, i_brand, cc_name + ORDER BY d_year, d_moy) rn + FROM item, catalog_sales, date_dim, call_center + WHERE cs_item_sk = i_item_sk AND + cs_sold_date_sk = d_date_sk AND + cc_call_center_sk = cs_call_center_sk AND + ( + d_year = 1999 OR + (d_year = 1999 - 1 AND d_moy = 12) OR + (d_year = 1999 + 1 AND d_moy = 1) + ) + GROUP BY i_category, i_brand, + cc_name, d_year, d_moy), + v2 AS ( + SELECT + v1.i_category, + v1.i_brand, + v1.cc_name, + v1.d_year, + v1.d_moy, + v1.avg_monthly_sales, + v1.sum_sales, + v1_lag.sum_sales psum, + v1_lead.sum_sales nsum + FROM v1, v1 v1_lag, v1 v1_lead + WHERE v1.i_category = v1_lag.i_category AND + v1.i_category = v1_lead.i_category AND + v1.i_brand = v1_lag.i_brand AND + v1.i_brand = v1_lead.i_brand AND + v1.cc_name = v1_lag.cc_name AND + v1.cc_name = v1_lead.cc_name AND + v1.rn = v1_lag.rn + 1 AND + v1.rn = v1_lead.rn - 1) +SELECT * +FROM v2 +WHERE d_year = 1999 AND + avg_monthly_sales > 0 AND + CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, 3 +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q58.sql b/core/src/test/resources/tpcds-queries-double/q58.sql new file mode 100755 index 000000000..5f63f33dc --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q58.sql @@ -0,0 +1,59 @@ +WITH ss_items AS +(SELECT + i_item_id item_id, + sum(ss_ext_sales_price) ss_item_rev + FROM store_sales, item, date_dim + WHERE ss_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND ss_sold_date_sk = d_date_sk + GROUP BY i_item_id), + cs_items AS + (SELECT + i_item_id item_id, + sum(cs_ext_sales_price) cs_item_rev + FROM catalog_sales, item, date_dim + WHERE cs_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND cs_sold_date_sk = d_date_sk + GROUP BY i_item_id), + ws_items AS + (SELECT + i_item_id item_id, + sum(ws_ext_sales_price) ws_item_rev + FROM web_sales, item, date_dim + WHERE ws_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND ws_sold_date_sk = d_date_sk + GROUP BY i_item_id) +SELECT + ss_items.item_id, + ss_item_rev, + ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev, + cs_item_rev, + cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev, + ws_item_rev, + ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev, + (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average +FROM ss_items, cs_items, ws_items +WHERE ss_items.item_id = cs_items.item_id + AND ss_items.item_id = ws_items.item_id + AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev + AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev + AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev + AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev + AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev + AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev +ORDER BY item_id, ss_item_rev +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q59.sql b/core/src/test/resources/tpcds-queries-double/q59.sql new file mode 100755 index 000000000..3cef20276 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q59.sql @@ -0,0 +1,75 @@ +WITH wss AS +(SELECT + d_week_seq, + ss_store_sk, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN ss_sales_price + ELSE NULL END) sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN ss_sales_price + ELSE NULL END) mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN ss_sales_price + ELSE NULL END) tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN ss_sales_price + ELSE NULL END) wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN ss_sales_price + ELSE NULL END) thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN ss_sales_price + ELSE NULL END) fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN ss_sales_price + ELSE NULL END) sat_sales + FROM store_sales, date_dim + WHERE d_date_sk = ss_sold_date_sk + GROUP BY d_week_seq, ss_store_sk +) +SELECT + s_store_name1, + s_store_id1, + d_week_seq1, + sun_sales1 / sun_sales2, + mon_sales1 / mon_sales2, + tue_sales1 / tue_sales2, + wed_sales1 / wed_sales2, + thu_sales1 / thu_sales2, + fri_sales1 / fri_sales2, + sat_sales1 / sat_sales2 +FROM + (SELECT + s_store_name s_store_name1, + wss.d_week_seq d_week_seq1, + s_store_id s_store_id1, + sun_sales sun_sales1, + mon_sales mon_sales1, + tue_sales tue_sales1, + wed_sales wed_sales1, + thu_sales thu_sales1, + fri_sales fri_sales1, + sat_sales sat_sales1 + FROM wss, store, date_dim d + WHERE d.d_week_seq = wss.d_week_seq AND + ss_store_sk = s_store_sk AND + d_month_seq BETWEEN 1212 AND 1212 + 11) y, + (SELECT + s_store_name s_store_name2, + wss.d_week_seq d_week_seq2, + s_store_id s_store_id2, + sun_sales sun_sales2, + mon_sales mon_sales2, + tue_sales tue_sales2, + wed_sales wed_sales2, + thu_sales thu_sales2, + fri_sales fri_sales2, + sat_sales sat_sales2 + FROM wss, store, date_dim d + WHERE d.d_week_seq = wss.d_week_seq AND + ss_store_sk = s_store_sk AND + d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x +WHERE s_store_id1 = s_store_id2 + AND d_week_seq1 = d_week_seq2 - 52 +ORDER BY s_store_name1, s_store_id1, d_week_seq1 +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q6.sql b/core/src/test/resources/tpcds-queries-double/q6.sql new file mode 100755 index 000000000..f0f5cf05a --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q6.sql @@ -0,0 +1,21 @@ +SELECT + a.ca_state state, + count(*) cnt +FROM + customer_address a, customer c, store_sales s, date_dim d, item i +WHERE a.ca_address_sk = c.c_current_addr_sk + AND c.c_customer_sk = s.ss_customer_sk + AND s.ss_sold_date_sk = d.d_date_sk + AND s.ss_item_sk = i.i_item_sk + AND d.d_month_seq = + (SELECT DISTINCT (d_month_seq) + FROM date_dim + WHERE d_year = 2000 AND d_moy = 1) + AND i.i_current_price > 1.2 * + (SELECT avg(j.i_current_price) + FROM item j + WHERE j.i_category = i.i_category) +GROUP BY a.ca_state +HAVING count(*) >= 10 +ORDER BY cnt +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q60.sql b/core/src/test/resources/tpcds-queries-double/q60.sql new file mode 100755 index 000000000..41b963f44 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q60.sql @@ -0,0 +1,62 @@ +WITH ss AS ( + SELECT + i_item_id, + sum(ss_ext_sales_price) total_sales + FROM store_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + cs AS ( + SELECT + i_item_id, + sum(cs_ext_sales_price) total_sales + FROM catalog_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + ws AS ( + SELECT + i_item_id, + sum(ws_ext_sales_price) total_sales + FROM web_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id) +SELECT + i_item_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_item_id +ORDER BY i_item_id, total_sales +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q61.sql b/core/src/test/resources/tpcds-queries-double/q61.sql new file mode 100755 index 000000000..79e5d975c --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q61.sql @@ -0,0 +1,33 @@ +SELECT + promotions, + total, + cast(promotions AS DOUBLE) / cast(total AS DOUBLE) * 100 +FROM + (SELECT sum(ss_ext_sales_price) promotions + FROM store_sales, store, promotion, date_dim, customer, customer_address, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND ss_promo_sk = p_promo_sk + AND ss_customer_sk = c_customer_sk + AND ca_address_sk = c_current_addr_sk + AND ss_item_sk = i_item_sk + AND ca_gmt_offset = -5 + AND i_category = 'Jewelry' + AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') + AND s_gmt_offset = -5 + AND d_year = 1998 + AND d_moy = 11) promotional_sales, + (SELECT sum(ss_ext_sales_price) total + FROM store_sales, store, date_dim, customer, customer_address, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND ss_customer_sk = c_customer_sk + AND ca_address_sk = c_current_addr_sk + AND ss_item_sk = i_item_sk + AND ca_gmt_offset = -5 + AND i_category = 'Jewelry' + AND s_gmt_offset = -5 + AND d_year = 1998 + AND d_moy = 11) all_sales +ORDER BY promotions, total +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q62.sql b/core/src/test/resources/tpcds-queries-double/q62.sql new file mode 100755 index 000000000..8a414f154 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q62.sql @@ -0,0 +1,35 @@ +SELECT + substr(w_warehouse_name, 1, 20), + sm_type, + web_name, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND + (ws_ship_date_sk - ws_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND + (ws_ship_date_sk - ws_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND + (ws_ship_date_sk - ws_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + web_sales, warehouse, ship_mode, web_site, date_dim +WHERE + d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ws_ship_date_sk = d_date_sk + AND ws_warehouse_sk = w_warehouse_sk + AND ws_ship_mode_sk = sm_ship_mode_sk + AND ws_web_site_sk = web_site_sk +GROUP BY + substr(w_warehouse_name, 1, 20), sm_type, web_name +ORDER BY + substr(w_warehouse_name, 1, 20), sm_type, web_name +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q63.sql b/core/src/test/resources/tpcds-queries-double/q63.sql new file mode 100755 index 000000000..ef6867e0a --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q63.sql @@ -0,0 +1,31 @@ +SELECT * +FROM (SELECT + i_manager_id, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER (PARTITION BY i_manager_id) avg_monthly_sales +FROM item + , store_sales + , date_dim + , store +WHERE ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, + 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) + AND ((i_category IN ('Books', 'Children', 'Electronics') + AND i_class IN ('personal', 'portable', 'refernece', 'self-help') + AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', + 'exportiunivamalg #9', 'scholaramalgamalg #9')) + OR (i_category IN ('Women', 'Music', 'Men') + AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') + AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', + 'importoamalg #1'))) +GROUP BY i_manager_id, d_moy) tmp1 +WHERE CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY i_manager_id + , avg_monthly_sales + , sum_sales +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q64.sql b/core/src/test/resources/tpcds-queries-double/q64.sql new file mode 100755 index 000000000..8ec1d31b6 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q64.sql @@ -0,0 +1,92 @@ +WITH cs_ui AS +(SELECT + cs_item_sk, + sum(cs_ext_list_price) AS sale, + sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund + FROM catalog_sales + , catalog_returns + WHERE cs_item_sk = cr_item_sk + AND cs_order_number = cr_order_number + GROUP BY cs_item_sk + HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)), + cross_sales AS + (SELECT + i_product_name product_name, + i_item_sk item_sk, + s_store_name store_name, + s_zip store_zip, + ad1.ca_street_number b_street_number, + ad1.ca_street_name b_streen_name, + ad1.ca_city b_city, + ad1.ca_zip b_zip, + ad2.ca_street_number c_street_number, + ad2.ca_street_name c_street_name, + ad2.ca_city c_city, + ad2.ca_zip c_zip, + d1.d_year AS syear, + d2.d_year AS fsyear, + d3.d_year s2year, + count(*) cnt, + sum(ss_wholesale_cost) s1, + sum(ss_list_price) s2, + sum(ss_coupon_amt) s3 + FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, + store, customer, customer_demographics cd1, customer_demographics cd2, + promotion, household_demographics hd1, household_demographics hd2, + customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk = cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk AND + ss_item_sk = i_item_sk AND + ss_item_sk = sr_item_sk AND + ss_ticket_number = sr_ticket_number AND + ss_item_sk = cs_ui.cs_item_sk AND + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk AND + c_first_sales_date_sk = d2.d_date_sk AND + c_first_shipto_date_sk = d3.d_date_sk AND + ss_promo_sk = p_promo_sk AND + hd1.hd_income_band_sk = ib1.ib_income_band_sk AND + hd2.hd_income_band_sk = ib2.ib_income_band_sk AND + cd1.cd_marital_status <> cd2.cd_marital_status AND + i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND + i_current_price BETWEEN 64 AND 64 + 10 AND + i_current_price BETWEEN 64 + 1 AND 64 + 15 + GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, + ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, + ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year + ) +SELECT + cs1.product_name, + cs1.store_name, + cs1.store_zip, + cs1.b_street_number, + cs1.b_streen_name, + cs1.b_city, + cs1.b_zip, + cs1.c_street_number, + cs1.c_street_name, + cs1.c_city, + cs1.c_zip, + cs1.syear, + cs1.cnt, + cs1.s1, + cs1.s2, + cs1.s3, + cs2.s1, + cs2.s2, + cs2.s3, + cs2.syear, + cs2.cnt +FROM cross_sales cs1, cross_sales cs2 +WHERE cs1.item_sk = cs2.item_sk AND + cs1.syear = 1999 AND + cs2.syear = 1999 + 1 AND + cs2.cnt <= cs1.cnt AND + cs1.store_name = cs2.store_name AND + cs1.store_zip = cs2.store_zip +ORDER BY cs1.product_name, cs1.store_name, cs2.cnt diff --git a/core/src/test/resources/tpcds-queries-double/q65.sql b/core/src/test/resources/tpcds-queries-double/q65.sql new file mode 100755 index 000000000..aad04be1b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q65.sql @@ -0,0 +1,33 @@ +SELECT + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand +FROM store, item, + (SELECT + ss_store_sk, + avg(revenue) AS ave + FROM + (SELECT + ss_store_sk, + ss_item_sk, + sum(ss_sales_price) AS revenue + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 + GROUP BY ss_store_sk, ss_item_sk) sa + GROUP BY ss_store_sk) sb, + (SELECT + ss_store_sk, + ss_item_sk, + sum(ss_sales_price) AS revenue + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 + GROUP BY ss_store_sk, ss_item_sk) sc +WHERE sb.ss_store_sk = sc.ss_store_sk AND + sc.revenue <= 0.1 * sb.ave AND + s_store_sk = sc.ss_store_sk AND + i_item_sk = sc.ss_item_sk +ORDER BY s_store_name, i_item_desc +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q66.sql b/core/src/test/resources/tpcds-queries-double/q66.sql new file mode 100755 index 000000000..f826b4164 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q66.sql @@ -0,0 +1,240 @@ +SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + ship_carriers, + year, + sum(jan_sales) AS jan_sales, + sum(feb_sales) AS feb_sales, + sum(mar_sales) AS mar_sales, + sum(apr_sales) AS apr_sales, + sum(may_sales) AS may_sales, + sum(jun_sales) AS jun_sales, + sum(jul_sales) AS jul_sales, + sum(aug_sales) AS aug_sales, + sum(sep_sales) AS sep_sales, + sum(oct_sales) AS oct_sales, + sum(nov_sales) AS nov_sales, + sum(dec_sales) AS dec_sales, + sum(jan_sales / w_warehouse_sq_ft) AS jan_sales_per_sq_foot, + sum(feb_sales / w_warehouse_sq_ft) AS feb_sales_per_sq_foot, + sum(mar_sales / w_warehouse_sq_ft) AS mar_sales_per_sq_foot, + sum(apr_sales / w_warehouse_sq_ft) AS apr_sales_per_sq_foot, + sum(may_sales / w_warehouse_sq_ft) AS may_sales_per_sq_foot, + sum(jun_sales / w_warehouse_sq_ft) AS jun_sales_per_sq_foot, + sum(jul_sales / w_warehouse_sq_ft) AS jul_sales_per_sq_foot, + sum(aug_sales / w_warehouse_sq_ft) AS aug_sales_per_sq_foot, + sum(sep_sales / w_warehouse_sq_ft) AS sep_sales_per_sq_foot, + sum(oct_sales / w_warehouse_sq_ft) AS oct_sales_per_sq_foot, + sum(nov_sales / w_warehouse_sq_ft) AS nov_sales_per_sq_foot, + sum(dec_sales / w_warehouse_sq_ft) AS dec_sales_per_sq_foot, + sum(jan_net) AS jan_net, + sum(feb_net) AS feb_net, + sum(mar_net) AS mar_net, + sum(apr_net) AS apr_net, + sum(may_net) AS may_net, + sum(jun_net) AS jun_net, + sum(jul_net) AS jul_net, + sum(aug_net) AS aug_net, + sum(sep_net) AS sep_net, + sum(oct_net) AS oct_net, + sum(nov_net) AS nov_net, + sum(dec_net) AS dec_net +FROM ( + (SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + concat('DHL', ',', 'BARIAN') AS ship_carriers, + d_year AS year, + sum(CASE WHEN d_moy = 1 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jan_sales, + sum(CASE WHEN d_moy = 2 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS feb_sales, + sum(CASE WHEN d_moy = 3 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS mar_sales, + sum(CASE WHEN d_moy = 4 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS apr_sales, + sum(CASE WHEN d_moy = 5 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS may_sales, + sum(CASE WHEN d_moy = 6 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jun_sales, + sum(CASE WHEN d_moy = 7 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jul_sales, + sum(CASE WHEN d_moy = 8 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS aug_sales, + sum(CASE WHEN d_moy = 9 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS sep_sales, + sum(CASE WHEN d_moy = 10 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS oct_sales, + sum(CASE WHEN d_moy = 11 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS nov_sales, + sum(CASE WHEN d_moy = 12 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS dec_sales, + sum(CASE WHEN d_moy = 1 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jan_net, + sum(CASE WHEN d_moy = 2 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS feb_net, + sum(CASE WHEN d_moy = 3 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS mar_net, + sum(CASE WHEN d_moy = 4 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS apr_net, + sum(CASE WHEN d_moy = 5 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS may_net, + sum(CASE WHEN d_moy = 6 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jun_net, + sum(CASE WHEN d_moy = 7 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jul_net, + sum(CASE WHEN d_moy = 8 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS aug_net, + sum(CASE WHEN d_moy = 9 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS sep_net, + sum(CASE WHEN d_moy = 10 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS oct_net, + sum(CASE WHEN d_moy = 11 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS nov_net, + sum(CASE WHEN d_moy = 12 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS dec_net + FROM + web_sales, warehouse, date_dim, time_dim, ship_mode + WHERE + ws_warehouse_sk = w_warehouse_sk + AND ws_sold_date_sk = d_date_sk + AND ws_sold_time_sk = t_time_sk + AND ws_ship_mode_sk = sm_ship_mode_sk + AND d_year = 2001 + AND t_time BETWEEN 30838 AND 30838 + 28800 + AND sm_carrier IN ('DHL', 'BARIAN') + GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year) + UNION ALL + (SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + concat('DHL', ',', 'BARIAN') AS ship_carriers, + d_year AS year, + sum(CASE WHEN d_moy = 1 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jan_sales, + sum(CASE WHEN d_moy = 2 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS feb_sales, + sum(CASE WHEN d_moy = 3 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS mar_sales, + sum(CASE WHEN d_moy = 4 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS apr_sales, + sum(CASE WHEN d_moy = 5 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS may_sales, + sum(CASE WHEN d_moy = 6 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jun_sales, + sum(CASE WHEN d_moy = 7 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jul_sales, + sum(CASE WHEN d_moy = 8 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS aug_sales, + sum(CASE WHEN d_moy = 9 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS sep_sales, + sum(CASE WHEN d_moy = 10 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS oct_sales, + sum(CASE WHEN d_moy = 11 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS nov_sales, + sum(CASE WHEN d_moy = 12 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS dec_sales, + sum(CASE WHEN d_moy = 1 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jan_net, + sum(CASE WHEN d_moy = 2 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS feb_net, + sum(CASE WHEN d_moy = 3 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS mar_net, + sum(CASE WHEN d_moy = 4 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS apr_net, + sum(CASE WHEN d_moy = 5 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS may_net, + sum(CASE WHEN d_moy = 6 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jun_net, + sum(CASE WHEN d_moy = 7 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jul_net, + sum(CASE WHEN d_moy = 8 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS aug_net, + sum(CASE WHEN d_moy = 9 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS sep_net, + sum(CASE WHEN d_moy = 10 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS oct_net, + sum(CASE WHEN d_moy = 11 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS nov_net, + sum(CASE WHEN d_moy = 12 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS dec_net + FROM + catalog_sales, warehouse, date_dim, time_dim, ship_mode + WHERE + cs_warehouse_sk = w_warehouse_sk + AND cs_sold_date_sk = d_date_sk + AND cs_sold_time_sk = t_time_sk + AND cs_ship_mode_sk = sm_ship_mode_sk + AND d_year = 2001 + AND t_time BETWEEN 30838 AND 30838 + 28800 + AND sm_carrier IN ('DHL', 'BARIAN') + GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year + ) + ) x +GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, + ship_carriers, year +ORDER BY w_warehouse_name +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q67.sql b/core/src/test/resources/tpcds-queries-double/q67.sql new file mode 100755 index 000000000..f66e2252b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q67.sql @@ -0,0 +1,38 @@ +SELECT * +FROM + (SELECT + i_category, + i_class, + i_brand, + i_product_name, + d_year, + d_qoy, + d_moy, + s_store_id, + sumsales, + rank() + OVER (PARTITION BY i_category + ORDER BY sumsales DESC) rk + FROM + (SELECT + i_category, + i_class, + i_brand, + i_product_name, + d_year, + d_qoy, + d_moy, + s_store_id, + sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales + FROM store_sales, date_dim, store, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, + d_moy, s_store_id)) dw1) dw2 +WHERE rk <= 100 +ORDER BY + i_category, i_class, i_brand, i_product_name, d_year, + d_qoy, d_moy, s_store_id, sumsales, rk +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q68.sql b/core/src/test/resources/tpcds-queries-double/q68.sql new file mode 100755 index 000000000..adb8a7189 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q68.sql @@ -0,0 +1,34 @@ +SELECT + c_last_name, + c_first_name, + ca_city, + bought_city, + ss_ticket_number, + extended_price, + extended_tax, + list_price +FROM (SELECT + ss_ticket_number, + ss_customer_sk, + ca_city bought_city, + sum(ss_ext_sales_price) extended_price, + sum(ss_ext_list_price) list_price, + sum(ss_ext_tax) extended_tax +FROM store_sales, date_dim, store, household_demographics, customer_address +WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND store_sales.ss_addr_sk = customer_address.ca_address_sk + AND date_dim.d_dom BETWEEN 1 AND 2 + AND (household_demographics.hd_dep_count = 4 OR + household_demographics.hd_vehicle_count = 3) + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_city IN ('Midway', 'Fairview') +GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, + customer, + customer_address current_addr +WHERE ss_customer_sk = c_customer_sk + AND customer.c_current_addr_sk = current_addr.ca_address_sk + AND current_addr.ca_city <> bought_city +ORDER BY c_last_name, ss_ticket_number +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q69.sql b/core/src/test/resources/tpcds-queries-double/q69.sql new file mode 100755 index 000000000..1f0ee64f5 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q69.sql @@ -0,0 +1,38 @@ +SELECT + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + ca_state IN ('KY', 'GA', 'NM') AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2) AND + (NOT exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2) AND + NOT exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2)) +GROUP BY cd_gender, cd_marital_status, cd_education_status, + cd_purchase_estimate, cd_credit_rating +ORDER BY cd_gender, cd_marital_status, cd_education_status, + cd_purchase_estimate, cd_credit_rating +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q7.sql b/core/src/test/resources/tpcds-queries-double/q7.sql new file mode 100755 index 000000000..6630a0054 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q7.sql @@ -0,0 +1,19 @@ +SELECT + i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 +FROM store_sales, customer_demographics, date_dim, item, promotion +WHERE ss_sold_date_sk = d_date_sk AND + ss_item_sk = i_item_sk AND + ss_cdemo_sk = cd_demo_sk AND + ss_promo_sk = p_promo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + (p_channel_email = 'N' OR p_channel_event = 'N') AND + d_year = 2000 +GROUP BY i_item_id +ORDER BY i_item_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q70.sql b/core/src/test/resources/tpcds-queries-double/q70.sql new file mode 100755 index 000000000..625011b21 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q70.sql @@ -0,0 +1,38 @@ +SELECT + sum(ss_net_profit) AS total_sum, + s_state, + s_county, + grouping(s_state) + grouping(s_county) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(s_state) + grouping(s_county), + CASE WHEN grouping(s_county) = 0 + THEN s_state END + ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent +FROM + store_sales, date_dim d1, store +WHERE + d1.d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d1.d_date_sk = ss_sold_date_sk + AND s_store_sk = ss_store_sk + AND s_state IN + (SELECT s_state + FROM + (SELECT + s_state AS s_state, + rank() + OVER (PARTITION BY s_state + ORDER BY sum(ss_net_profit) DESC) AS ranking + FROM store_sales, store, date_dim + WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d_date_sk = ss_sold_date_sk + AND s_store_sk = ss_store_sk + GROUP BY s_state) tmp1 + WHERE ranking <= 5) +GROUP BY ROLLUP (s_state, s_county) +ORDER BY + lochierarchy DESC + , CASE WHEN lochierarchy = 0 + THEN s_state END + , rank_within_parent +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q71.sql b/core/src/test/resources/tpcds-queries-double/q71.sql new file mode 100755 index 000000000..8d724b924 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q71.sql @@ -0,0 +1,44 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + t_hour, + t_minute, + sum(ext_price) ext_price +FROM item, + (SELECT + ws_ext_sales_price AS ext_price, + ws_sold_date_sk AS sold_date_sk, + ws_item_sk AS sold_item_sk, + ws_sold_time_sk AS time_sk + FROM web_sales, date_dim + WHERE d_date_sk = ws_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + UNION ALL + SELECT + cs_ext_sales_price AS ext_price, + cs_sold_date_sk AS sold_date_sk, + cs_item_sk AS sold_item_sk, + cs_sold_time_sk AS time_sk + FROM catalog_sales, date_dim + WHERE d_date_sk = cs_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + UNION ALL + SELECT + ss_ext_sales_price AS ext_price, + ss_sold_date_sk AS sold_date_sk, + ss_item_sk AS sold_item_sk, + ss_sold_time_sk AS time_sk + FROM store_sales, date_dim + WHERE d_date_sk = ss_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + ) AS tmp, time_dim +WHERE + sold_item_sk = i_item_sk + AND i_manager_id = 1 + AND time_sk = t_time_sk + AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner') +GROUP BY i_brand, i_brand_id, t_hour, t_minute +ORDER BY ext_price DESC, brand_id diff --git a/core/src/test/resources/tpcds-queries-double/q72.sql b/core/src/test/resources/tpcds-queries-double/q72.sql new file mode 100755 index 000000000..99b3eee54 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q72.sql @@ -0,0 +1,33 @@ +SELECT + i_item_desc, + w_warehouse_name, + d1.d_week_seq, + count(CASE WHEN p_promo_sk IS NULL + THEN 1 + ELSE 0 END) no_promo, + count(CASE WHEN p_promo_sk IS NOT NULL + THEN 1 + ELSE 0 END) promo, + count(*) total_cnt +FROM catalog_sales + JOIN inventory ON (cs_item_sk = inv_item_sk) + JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) + JOIN item ON (i_item_sk = cs_item_sk) + JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) + JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) + JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk) + JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk) + JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk) + LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) + LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) +WHERE d1.d_week_seq = d2.d_week_seq + AND inv_quantity_on_hand < cs_quantity + AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days) + AND hd_buy_potential = '>10000' + AND d1.d_year = 1999 + AND hd_buy_potential = '>10000' + AND cd_marital_status = 'D' + AND d1.d_year = 1999 +GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq +ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q73.sql b/core/src/test/resources/tpcds-queries-double/q73.sql new file mode 100755 index 000000000..881be2e90 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q73.sql @@ -0,0 +1,30 @@ +SELECT + c_last_name, + c_first_name, + c_salutation, + c_preferred_cust_flag, + ss_ticket_number, + cnt +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + count(*) cnt + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND date_dim.d_dom BETWEEN 1 AND 2 + AND (household_demographics.hd_buy_potential = '>10000' OR + household_demographics.hd_buy_potential = 'unknown') + AND household_demographics.hd_vehicle_count > 0 + AND CASE WHEN household_demographics.hd_vehicle_count > 0 + THEN + household_demographics.hd_dep_count / household_demographics.hd_vehicle_count + ELSE NULL END > 1 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') + GROUP BY ss_ticket_number, ss_customer_sk) dj, customer +WHERE ss_customer_sk = c_customer_sk + AND cnt BETWEEN 1 AND 5 +ORDER BY cnt DESC diff --git a/core/src/test/resources/tpcds-queries-double/q74.sql b/core/src/test/resources/tpcds-queries-double/q74.sql new file mode 100755 index 000000000..154b26d68 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q74.sql @@ -0,0 +1,58 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + d_year AS year, + sum(ss_net_paid) year_total, + 's' sale_type + FROM + customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2001, 2001 + 1) + GROUP BY + c_customer_id, c_first_name, c_last_name, d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + d_year AS year, + sum(ws_net_paid) year_total, + 'w' sale_type + FROM + customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk + AND ws_sold_date_sk = d_date_sk + AND d_year IN (2001, 2001 + 1) + GROUP BY + c_customer_id, c_first_name, c_last_name, d_year) +SELECT + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name +FROM + year_total t_s_firstyear, year_total t_s_secyear, + year_total t_w_firstyear, year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.year = 2001 + AND t_s_secyear.year = 2001 + 1 + AND t_w_firstyear.year = 2001 + AND t_w_secyear.year = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END +ORDER BY 1, 1, 1 +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q75.sql b/core/src/test/resources/tpcds-queries-double/q75.sql new file mode 100755 index 000000000..3f7b67926 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q75.sql @@ -0,0 +1,76 @@ +WITH all_sales AS ( + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + SUM(sales_cnt) AS sales_cnt, + SUM(sales_amt) AS sales_amt + FROM ( + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, + cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt + FROM catalog_sales + JOIN item ON i_item_sk = cs_item_sk + JOIN date_dim ON d_date_sk = cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number + AND cs_item_sk = cr_item_sk) + WHERE i_category = 'Books' + UNION + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, + ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt + FROM store_sales + JOIN item ON i_item_sk = ss_item_sk + JOIN date_dim ON d_date_sk = ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk) + WHERE i_category = 'Books' + UNION + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, + ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt + FROM web_sales + JOIN item ON i_item_sk = ws_item_sk + JOIN date_dim ON d_date_sk = ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number = wr_order_number + AND ws_item_sk = wr_item_sk) + WHERE i_category = 'Books') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) +SELECT + prev_yr.d_year AS prev_year, + curr_yr.d_year AS year, + curr_yr.i_brand_id, + curr_yr.i_class_id, + curr_yr.i_category_id, + curr_yr.i_manufact_id, + prev_yr.sales_cnt AS prev_yr_cnt, + curr_yr.sales_cnt AS curr_yr_cnt, + curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, + curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff +FROM all_sales curr_yr, all_sales prev_yr +WHERE curr_yr.i_brand_id = prev_yr.i_brand_id + AND curr_yr.i_class_id = prev_yr.i_class_id + AND curr_yr.i_category_id = prev_yr.i_category_id + AND curr_yr.i_manufact_id = prev_yr.i_manufact_id + AND curr_yr.d_year = 2002 + AND prev_yr.d_year = 2002 - 1 + AND CAST(curr_yr.sales_cnt AS DOUBLE) / CAST(prev_yr.sales_cnt AS DOUBLE) < 0.9 +ORDER BY sales_cnt_diff +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q76.sql b/core/src/test/resources/tpcds-queries-double/q76.sql new file mode 100755 index 000000000..815fa922b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q76.sql @@ -0,0 +1,47 @@ +SELECT + channel, + col_name, + d_year, + d_qoy, + i_category, + COUNT(*) sales_cnt, + SUM(ext_sales_price) sales_amt +FROM ( + SELECT + 'store' AS channel, + ss_store_sk col_name, + d_year, + d_qoy, + i_category, + ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_store_sk IS NULL + AND ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + UNION ALL + SELECT + 'web' AS channel, + ws_ship_customer_sk col_name, + d_year, + d_qoy, + i_category, + ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_customer_sk IS NULL + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk = i_item_sk + UNION ALL + SELECT + 'catalog' AS channel, + cs_ship_addr_sk col_name, + d_year, + d_qoy, + i_category, + cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_addr_sk IS NULL + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk = i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q77.sql b/core/src/test/resources/tpcds-queries-double/q77.sql new file mode 100755 index 000000000..a69df9fbc --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q77.sql @@ -0,0 +1,100 @@ +WITH ss AS +(SELECT + s_store_sk, + sum(ss_ext_sales_price) AS sales, + sum(ss_net_profit) AS profit + FROM store_sales, date_dim, store + WHERE ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND ss_store_sk = s_store_sk + GROUP BY s_store_sk), + sr AS + (SELECT + s_store_sk, + sum(sr_return_amt) AS returns, + sum(sr_net_loss) AS profit_loss + FROM store_returns, date_dim, store + WHERE sr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND sr_store_sk = s_store_sk + GROUP BY s_store_sk), + cs AS + (SELECT + cs_call_center_sk, + sum(cs_ext_sales_price) AS sales, + sum(cs_net_profit) AS profit + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + GROUP BY cs_call_center_sk), + cr AS + (SELECT + sum(cr_return_amount) AS returns, + sum(cr_net_loss) AS profit_loss + FROM catalog_returns, date_dim + WHERE cr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days)), + ws AS + (SELECT + wp_web_page_sk, + sum(ws_ext_sales_price) AS sales, + sum(ws_net_profit) AS profit + FROM web_sales, date_dim, web_page + WHERE ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND ws_web_page_sk = wp_web_page_sk + GROUP BY wp_web_page_sk), + wr AS + (SELECT + wp_web_page_sk, + sum(wr_return_amt) AS returns, + sum(wr_net_loss) AS profit_loss + FROM web_returns, date_dim, web_page + WHERE wr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND wr_web_page_sk = wp_web_page_sk + GROUP BY wp_web_page_sk) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM + (SELECT + 'store channel' AS channel, + ss.s_store_sk AS id, + sales, + coalesce(returns, 0) AS returns, + (profit - coalesce(profit_loss, 0)) AS profit + FROM ss + LEFT JOIN sr + ON ss.s_store_sk = sr.s_store_sk + UNION ALL + SELECT + 'catalog channel' AS channel, + cs_call_center_sk AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM cs, cr + UNION ALL + SELECT + 'web channel' AS channel, + ws.wp_web_page_sk AS id, + sales, + coalesce(returns, 0) returns, + (profit - coalesce(profit_loss, 0)) AS profit + FROM ws + LEFT JOIN wr + ON ws.wp_web_page_sk = wr.wp_web_page_sk + ) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q78.sql b/core/src/test/resources/tpcds-queries-double/q78.sql new file mode 100755 index 000000000..07b0940e2 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q78.sql @@ -0,0 +1,64 @@ +WITH ws AS +(SELECT + d_year AS ws_sold_year, + ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + FROM web_sales + LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk + JOIN date_dim ON ws_sold_date_sk = d_date_sk + WHERE wr_order_number IS NULL + GROUP BY d_year, ws_item_sk, ws_bill_customer_sk +), + cs AS + (SELECT + d_year AS cs_sold_year, + cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + FROM catalog_sales + LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk + JOIN date_dim ON cs_sold_date_sk = d_date_sk + WHERE cr_order_number IS NULL + GROUP BY d_year, cs_item_sk, cs_bill_customer_sk + ), + ss AS + (SELECT + d_year AS ss_sold_year, + ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + FROM store_sales + LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk + JOIN date_dim ON ss_sold_date_sk = d_date_sk + WHERE sr_ticket_number IS NULL + GROUP BY d_year, ss_item_sk, ss_customer_sk + ) +SELECT + round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio, + ss_qty store_qty, + ss_wc store_wholesale_cost, + ss_sp store_sales_price, + coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty, + coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost, + coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price +FROM ss + LEFT JOIN ws + ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk) + LEFT JOIN cs + ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk) +WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000 +ORDER BY + ratio, + ss_qty DESC, ss_wc DESC, ss_sp DESC, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q79.sql b/core/src/test/resources/tpcds-queries-double/q79.sql new file mode 100755 index 000000000..08f86dc20 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q79.sql @@ -0,0 +1,27 @@ +SELECT + c_last_name, + c_first_name, + substr(s_city, 1, 30), + ss_ticket_number, + amt, + profit +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + store.s_city, + sum(ss_coupon_amt) amt, + sum(ss_net_profit) profit + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND (household_demographics.hd_dep_count = 6 OR + household_demographics.hd_vehicle_count > 2) + AND date_dim.d_dow = 1 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_number_employees BETWEEN 200 AND 295 + GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer +WHERE ss_customer_sk = c_customer_sk +ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q8.sql b/core/src/test/resources/tpcds-queries-double/q8.sql new file mode 100755 index 000000000..497725111 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q8.sql @@ -0,0 +1,87 @@ +SELECT + s_store_name, + sum(ss_net_profit) +FROM store_sales, date_dim, store, + (SELECT ca_zip + FROM ( + (SELECT substr(ca_zip, 1, 5) ca_zip + FROM customer_address + WHERE substr(ca_zip, 1, 5) IN ( + '24128','76232','65084','87816','83926','77556','20548', + '26231','43848','15126','91137','61265','98294','25782', + '17920','18426','98235','40081','84093','28577','55565', + '17183','54601','67897','22752','86284','18376','38607', + '45200','21756','29741','96765','23932','89360','29839', + '25989','28898','91068','72550','10390','18845','47770', + '82636','41367','76638','86198','81312','37126','39192', + '88424','72175','81426','53672','10445','42666','66864', + '66708','41248','48583','82276','18842','78890','49448', + '14089','38122','34425','79077','19849','43285','39861', + '66162','77610','13695','99543','83444','83041','12305', + '57665','68341','25003','57834','62878','49130','81096', + '18840','27700','23470','50412','21195','16021','76107', + '71954','68309','18119','98359','64544','10336','86379', + '27068','39736','98569','28915','24206','56529','57647', + '54917','42961','91110','63981','14922','36420','23006', + '67467','32754','30903','20260','31671','51798','72325', + '85816','68621','13955','36446','41766','68806','16725', + '15146','22744','35850','88086','51649','18270','52867', + '39972','96976','63792','11376','94898','13595','10516', + '90225','58943','39371','94945','28587','96576','57855', + '28488','26105','83933','25858','34322','44438','73171', + '30122','34102','22685','71256','78451','54364','13354', + '45375','40558','56458','28286','45266','47305','69399', + '83921','26233','11101','15371','69913','35942','15882', + '25631','24610','44165','99076','33786','70738','26653', + '14328','72305','62496','22152','10144','64147','48425', + '14663','21076','18799','30450','63089','81019','68893', + '24996','51200','51211','45692','92712','70466','79994', + '22437','25280','38935','71791','73134','56571','14060', + '19505','72425','56575','74351','68786','51650','20004', + '18383','76614','11634','18906','15765','41368','73241', + '76698','78567','97189','28545','76231','75691','22246', + '51061','90578','56691','68014','51103','94167','57047', + '14867','73520','15734','63435','25733','35474','24676', + '94627','53535','17879','15559','53268','59166','11928', + '59402','33282','45721','43933','68101','33515','36634', + '71286','19736','58058','55253','67473','41918','19515', + '36495','19430','22351','77191','91393','49156','50298', + '87501','18652','53179','18767','63193','23968','65164', + '68880','21286','72823','58470','67301','13394','31016', + '70372','67030','40604','24317','45748','39127','26065', + '77721','31029','31880','60576','24671','45549','13376', + '50016','33123','19769','22927','97789','46081','72151', + '15723','46136','51949','68100','96888','64528','14171', + '79777','28709','11489','25103','32213','78668','22245', + '15798','27156','37930','62971','21337','51622','67853', + '10567','38415','15455','58263','42029','60279','37125', + '56240','88190','50308','26859','64457','89091','82136', + '62377','36233','63837','58078','17043','30010','60099', + '28810','98025','29178','87343','73273','30469','64034', + '39516','86057','21309','90257','67875','40162','11356', + '73650','61810','72013','30431','22461','19512','13375', + '55307','30625','83849','68908','26689','96451','38193', + '46820','88885','84935','69035','83144','47537','56616', + '94983','48033','69952','25486','61547','27385','61860', + '58048','56910','16807','17871','35258','31387','35458', + '35576')) + INTERSECT + (SELECT ca_zip + FROM + (SELECT + substr(ca_zip, 1, 5) ca_zip, + count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk AND + c_preferred_cust_flag = 'Y' + GROUP BY ca_zip + HAVING count(*) > 10) A1) + ) A2 + ) V1 +WHERE ss_store_sk = s_store_sk + AND ss_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 1998 + AND (substr(s_zip, 1, 2) = substr(V1.ca_zip, 1, 2)) +GROUP BY s_store_name +ORDER BY s_store_name +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q80.sql b/core/src/test/resources/tpcds-queries-double/q80.sql new file mode 100755 index 000000000..433db87d2 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q80.sql @@ -0,0 +1,94 @@ +WITH ssr AS +(SELECT + s_store_id AS store_id, + sum(ss_ext_sales_price) AS sales, + sum(coalesce(sr_return_amt, 0)) AS returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit + FROM store_sales + LEFT OUTER JOIN store_returns ON + (ss_item_sk = sr_item_sk AND + ss_ticket_number = sr_ticket_number) + , + date_dim, store, item, promotion + WHERE ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND ss_store_sk = s_store_sk + AND ss_item_sk = i_item_sk + AND i_current_price > 50 + AND ss_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY s_store_id), + csr AS + (SELECT + cp_catalog_page_id AS catalog_page_id, + sum(cs_ext_sales_price) AS sales, + sum(coalesce(cr_return_amount, 0)) AS returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit + FROM catalog_sales + LEFT OUTER JOIN catalog_returns ON + (cs_item_sk = cr_item_sk AND + cs_order_number = cr_order_number) + , + date_dim, catalog_page, item, promotion + WHERE cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND cs_catalog_page_sk = cp_catalog_page_sk + AND cs_item_sk = i_item_sk + AND i_current_price > 50 + AND cs_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY cp_catalog_page_id), + wsr AS + (SELECT + web_site_id, + sum(ws_ext_sales_price) AS sales, + sum(coalesce(wr_return_amt, 0)) AS returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit + FROM web_sales + LEFT OUTER JOIN web_returns ON + (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number) + , + date_dim, web_site, item, promotion + WHERE ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND ws_web_site_sk = web_site_sk + AND ws_item_sk = i_item_sk + AND i_current_price > 50 + AND ws_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY web_site_id) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM (SELECT + 'store channel' AS channel, + concat('store', store_id) AS id, + sales, + returns, + profit + FROM ssr + UNION ALL + SELECT + 'catalog channel' AS channel, + concat('catalog_page', catalog_page_id) AS id, + sales, + returns, + profit + FROM csr + UNION ALL + SELECT + 'web channel' AS channel, + concat('web_site', web_site_id) AS id, + sales, + returns, + profit + FROM wsr) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q81.sql b/core/src/test/resources/tpcds-queries-double/q81.sql new file mode 100755 index 000000000..18f0ffa7e --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q81.sql @@ -0,0 +1,38 @@ +WITH customer_total_return AS +(SELECT + cr_returning_customer_sk AS ctr_customer_sk, + ca_state AS ctr_state, + sum(cr_return_amt_inc_tax) AS ctr_total_return + FROM catalog_returns, date_dim, customer_address + WHERE cr_returned_date_sk = d_date_sk + AND d_year = 2000 + AND cr_returning_addr_sk = ca_address_sk + GROUP BY cr_returning_customer_sk, ca_state ) +SELECT + c_customer_id, + c_salutation, + c_first_name, + c_last_name, + ca_street_number, + ca_street_name, + ca_street_type, + ca_suite_number, + ca_city, + ca_county, + ca_state, + ca_zip, + ca_country, + ca_gmt_offset, + ca_location_type, + ctr_total_return +FROM customer_total_return ctr1, customer_address, customer +WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 +FROM customer_total_return ctr2 +WHERE ctr1.ctr_state = ctr2.ctr_state) + AND ca_address_sk = c_current_addr_sk + AND ca_state = 'GA' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name + , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset + , ca_location_type, ctr_total_return +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q82.sql b/core/src/test/resources/tpcds-queries-double/q82.sql new file mode 100755 index 000000000..20942cfeb --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q82.sql @@ -0,0 +1,15 @@ +SELECT + i_item_id, + i_item_desc, + i_current_price +FROM item, inventory, date_dim, store_sales +WHERE i_current_price BETWEEN 62 AND 62 + 30 + AND inv_item_sk = i_item_sk + AND d_date_sk = inv_date_sk + AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days) + AND i_manufact_id IN (129, 270, 821, 423) + AND inv_quantity_on_hand BETWEEN 100 AND 500 + AND ss_item_sk = i_item_sk +GROUP BY i_item_id, i_item_desc, i_current_price +ORDER BY i_item_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q83.sql b/core/src/test/resources/tpcds-queries-double/q83.sql new file mode 100755 index 000000000..53c10c7de --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q83.sql @@ -0,0 +1,56 @@ +WITH sr_items AS +(SELECT + i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + FROM store_returns, item, date_dim + WHERE sr_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND sr_returned_date_sk = d_date_sk + GROUP BY i_item_id), + cr_items AS + (SELECT + i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + FROM catalog_returns, item, date_dim + WHERE cr_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND cr_returned_date_sk = d_date_sk + GROUP BY i_item_id), + wr_items AS + (SELECT + i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + FROM web_returns, item, date_dim + WHERE wr_item_sk = i_item_sk AND d_date IN + (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND wr_returned_date_sk = d_date_sk + GROUP BY i_item_id) +SELECT + sr_items.item_id, + sr_item_qty, + sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev, + cr_item_qty, + cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev, + wr_item_qty, + wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev, + (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average +FROM sr_items, cr_items, wr_items +WHERE sr_items.item_id = cr_items.item_id + AND sr_items.item_id = wr_items.item_id +ORDER BY sr_items.item_id, sr_item_qty +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q84.sql b/core/src/test/resources/tpcds-queries-double/q84.sql new file mode 100755 index 000000000..a1076b57c --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q84.sql @@ -0,0 +1,19 @@ +SELECT + c_customer_id AS customer_id, + concat(c_last_name, ', ', c_first_name) AS customername +FROM customer + , customer_address + , customer_demographics + , household_demographics + , income_band + , store_returns +WHERE ca_city = 'Edgewood' + AND c_current_addr_sk = ca_address_sk + AND ib_lower_bound >= 38128 + AND ib_upper_bound <= 38128 + 50000 + AND ib_income_band_sk = hd_income_band_sk + AND cd_demo_sk = c_current_cdemo_sk + AND hd_demo_sk = c_current_hdemo_sk + AND sr_cdemo_sk = cd_demo_sk +ORDER BY c_customer_id +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q85.sql b/core/src/test/resources/tpcds-queries-double/q85.sql new file mode 100755 index 000000000..cf718b0f8 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q85.sql @@ -0,0 +1,82 @@ +SELECT + substr(r_reason_desc, 1, 20), + avg(ws_quantity), + avg(wr_refunded_cash), + avg(wr_fee) +FROM web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason +WHERE ws_web_page_sk = wp_web_page_sk + AND ws_item_sk = wr_item_sk + AND ws_order_number = wr_order_number + AND ws_sold_date_sk = d_date_sk AND d_year = 2000 + AND cd1.cd_demo_sk = wr_refunded_cdemo_sk + AND cd2.cd_demo_sk = wr_returning_cdemo_sk + AND ca_address_sk = wr_refunded_addr_sk + AND r_reason_sk = wr_reason_sk + AND + ( + ( + cd1.cd_marital_status = 'M' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'Advanced Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 100.00 AND 150.00 + ) + OR + ( + cd1.cd_marital_status = 'S' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'College' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 50.00 AND 100.00 + ) + OR + ( + cd1.cd_marital_status = 'W' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = '2 yr Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 150.00 AND 200.00 + ) + ) + AND + ( + ( + ca_country = 'United States' + AND + ca_state IN ('IN', 'OH', 'NJ') + AND ws_net_profit BETWEEN 100 AND 200 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('WI', 'CT', 'KY') + AND ws_net_profit BETWEEN 150 AND 300 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('LA', 'IA', 'AR') + AND ws_net_profit BETWEEN 50 AND 250 + ) + ) +GROUP BY r_reason_desc +ORDER BY substr(r_reason_desc, 1, 20) + , avg(ws_quantity) + , avg(wr_refunded_cash) + , avg(wr_fee) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q86.sql b/core/src/test/resources/tpcds-queries-double/q86.sql new file mode 100755 index 000000000..789a4abf7 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q86.sql @@ -0,0 +1,24 @@ +SELECT + sum(ws_net_paid) AS total_sum, + i_category, + i_class, + grouping(i_category) + grouping(i_class) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(i_category) + grouping(i_class), + CASE WHEN grouping(i_class) = 0 + THEN i_category END + ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent +FROM + web_sales, date_dim d1, item +WHERE + d1.d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d1.d_date_sk = ws_sold_date_sk + AND i_item_sk = ws_item_sk +GROUP BY ROLLUP (i_category, i_class) +ORDER BY + lochierarchy DESC, + CASE WHEN lochierarchy = 0 + THEN i_category END, + rank_within_parent +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q87.sql b/core/src/test/resources/tpcds-queries-double/q87.sql new file mode 100755 index 000000000..4aaa9f39d --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q87.sql @@ -0,0 +1,28 @@ +SELECT count(*) +FROM ((SELECT DISTINCT + c_last_name, + c_first_name, + d_date +FROM store_sales, date_dim, customer +WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM catalog_sales, date_dim, customer + WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM web_sales, date_dim, customer + WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk + AND web_sales.ws_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + ) cool_cust diff --git a/core/src/test/resources/tpcds-queries-double/q88.sql b/core/src/test/resources/tpcds-queries-double/q88.sql new file mode 100755 index 000000000..25bcd90f4 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q88.sql @@ -0,0 +1,122 @@ +SELECT * +FROM + (SELECT count(*) h8_30_to_9 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 8 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s1, + (SELECT count(*) h9_to_9_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s2, + (SELECT count(*) h9_30_to_10 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s3, + (SELECT count(*) h10_to_10_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s4, + (SELECT count(*) h10_30_to_11 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s5, + (SELECT count(*) h11_to_11_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s6, + (SELECT count(*) h11_30_to_12 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s7, + (SELECT count(*) h12_to_12_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 12 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s8 diff --git a/core/src/test/resources/tpcds-queries-double/q89.sql b/core/src/test/resources/tpcds-queries-double/q89.sql new file mode 100755 index 000000000..75408cb03 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q89.sql @@ -0,0 +1,30 @@ +SELECT * +FROM ( + SELECT + i_category, + i_class, + i_brand, + s_store_name, + s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER + (PARTITION BY i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + d_year IN (1999) AND + ((i_category IN ('Books', 'Electronics', 'Sports') AND + i_class IN ('computers', 'stereo', 'football')) + OR (i_category IN ('Men', 'Jewelry', 'Women') AND + i_class IN ('shirts', 'birdal', 'dresses'))) + GROUP BY i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +WHERE CASE WHEN (avg_monthly_sales <> 0) + THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, s_store_name +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q9.sql b/core/src/test/resources/tpcds-queries-double/q9.sql new file mode 100755 index 000000000..de3db9d98 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q9.sql @@ -0,0 +1,48 @@ +SELECT + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 +FROM reason +WHERE r_reason_sk = 1 diff --git a/core/src/test/resources/tpcds-queries-double/q90.sql b/core/src/test/resources/tpcds-queries-double/q90.sql new file mode 100755 index 000000000..2ecf7d571 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q90.sql @@ -0,0 +1,19 @@ +SELECT cast(amc AS DOUBLE) / cast(pmc AS DOUBLE) am_pm_ratio +FROM (SELECT count(*) amc +FROM web_sales, household_demographics, time_dim, web_page +WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 8 AND 8 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, + (SELECT count(*) pmc + FROM web_sales, household_demographics, time_dim, web_page + WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 19 AND 19 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt +ORDER BY am_pm_ratio +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q91.sql b/core/src/test/resources/tpcds-queries-double/q91.sql new file mode 100755 index 000000000..9ca7ce00a --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q91.sql @@ -0,0 +1,23 @@ +SELECT + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +FROM + call_center, catalog_returns, date_dim, customer, customer_address, + customer_demographics, household_demographics +WHERE + cr_call_center_sk = cc_call_center_sk + AND cr_returned_date_sk = d_date_sk + AND cr_returning_customer_sk = c_customer_sk + AND cd_demo_sk = c_current_cdemo_sk + AND hd_demo_sk = c_current_hdemo_sk + AND ca_address_sk = c_current_addr_sk + AND d_year = 1998 + AND d_moy = 11 + AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') + OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) + AND hd_buy_potential LIKE 'Unknown%' + AND ca_gmt_offset = -7 +GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status +ORDER BY sum(cr_net_loss) DESC diff --git a/core/src/test/resources/tpcds-queries-double/q92.sql b/core/src/test/resources/tpcds-queries-double/q92.sql new file mode 100755 index 000000000..99129c3bd --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q92.sql @@ -0,0 +1,16 @@ +SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` +FROM web_sales, item, date_dim +WHERE i_manufact_id = 350 + AND i_item_sk = ws_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + AND ws_ext_discount_amt > + ( + SELECT 1.3 * avg(ws_ext_discount_amt) + FROM web_sales, date_dim + WHERE ws_item_sk = i_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + ) +ORDER BY sum(ws_ext_discount_amt) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q93.sql b/core/src/test/resources/tpcds-queries-double/q93.sql new file mode 100755 index 000000000..222dc31c1 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q93.sql @@ -0,0 +1,19 @@ +SELECT + ss_customer_sk, + sum(act_sales) sumsales +FROM (SELECT + ss_item_sk, + ss_ticket_number, + ss_customer_sk, + CASE WHEN sr_return_quantity IS NOT NULL + THEN (ss_quantity - sr_return_quantity) * ss_sales_price + ELSE (ss_quantity * ss_sales_price) END act_sales +FROM store_sales + LEFT OUTER JOIN store_returns + ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) + , + reason +WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t +GROUP BY ss_customer_sk +ORDER BY sumsales, ss_customer_sk +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q94.sql b/core/src/test/resources/tpcds-queries-double/q94.sql new file mode 100755 index 000000000..d6de3d75b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q94.sql @@ -0,0 +1,23 @@ +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND EXISTS(SELECT * + FROM web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + AND NOT EXISTS(SELECT * + FROM web_returns wr1 + WHERE ws1.ws_order_number = wr1.wr_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q95.sql b/core/src/test/resources/tpcds-queries-double/q95.sql new file mode 100755 index 000000000..df71f00bd --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q95.sql @@ -0,0 +1,29 @@ +WITH ws_wh AS +(SELECT + ws1.ws_order_number, + ws1.ws_warehouse_sk wh1, + ws2.ws_warehouse_sk wh2 + FROM web_sales ws1, web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND ws1.ws_order_number IN (SELECT ws_order_number + FROM ws_wh) + AND ws1.ws_order_number IN (SELECT wr_order_number + FROM web_returns, ws_wh + WHERE wr_order_number = ws_wh.ws_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q96.sql b/core/src/test/resources/tpcds-queries-double/q96.sql new file mode 100755 index 000000000..7ab17e7bc --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q96.sql @@ -0,0 +1,11 @@ +SELECT count(*) +FROM store_sales, household_demographics, time_dim, store +WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 20 + AND time_dim.t_minute >= 30 + AND household_demographics.hd_dep_count = 7 + AND store.s_store_name = 'ese' +ORDER BY count(*) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q97.sql b/core/src/test/resources/tpcds-queries-double/q97.sql new file mode 100755 index 000000000..e7e0b1a05 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q97.sql @@ -0,0 +1,30 @@ +WITH ssci AS ( + SELECT + ss_customer_sk customer_sk, + ss_item_sk item_sk + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY ss_customer_sk, ss_item_sk), + csci AS ( + SELECT + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY cs_bill_customer_sk, cs_item_sk) +SELECT + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL + THEN 1 + ELSE 0 END) store_only, + sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) catalog_only, + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) store_and_catalog +FROM ssci + FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk + AND ssci.item_sk = csci.item_sk) +LIMIT 100 diff --git a/core/src/test/resources/tpcds-queries-double/q98.sql b/core/src/test/resources/tpcds-queries-double/q98.sql new file mode 100755 index 000000000..bb10d4bf8 --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q98.sql @@ -0,0 +1,21 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(ss_ext_sales_price) AS itemrevenue, + sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM + store_sales, item, date_dim +WHERE + ss_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) + AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY + i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY + i_category, i_class, i_item_id, i_item_desc, revenueratio diff --git a/core/src/test/resources/tpcds-queries-double/q99.sql b/core/src/test/resources/tpcds-queries-double/q99.sql new file mode 100755 index 000000000..f1a3d4d2b --- /dev/null +++ b/core/src/test/resources/tpcds-queries-double/q99.sql @@ -0,0 +1,34 @@ +SELECT + substr(w_warehouse_name, 1, 20), + sm_type, + cc_name, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND + (cs_ship_date_sk - cs_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND + (cs_ship_date_sk - cs_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND + (cs_ship_date_sk - cs_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + catalog_sales, warehouse, ship_mode, call_center, date_dim +WHERE + d_month_seq BETWEEN 1200 AND 1200 + 11 + AND cs_ship_date_sk = d_date_sk + AND cs_warehouse_sk = w_warehouse_sk + AND cs_ship_mode_sk = sm_ship_mode_sk + AND cs_call_center_sk = cc_call_center_sk +GROUP BY + substr(w_warehouse_name, 1, 20), sm_type, cc_name +ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name +LIMIT 100 diff --git a/core/src/test/resources/tpch-queries/q1.sql b/core/src/test/resources/tpch-queries-double/q1.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q1.sql rename to core/src/test/resources/tpch-queries-double/q1.sql diff --git a/core/src/test/resources/tpch-queries/q10.sql b/core/src/test/resources/tpch-queries-double/q10.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q10.sql rename to core/src/test/resources/tpch-queries-double/q10.sql diff --git a/core/src/test/resources/tpch-queries/q11.sql b/core/src/test/resources/tpch-queries-double/q11.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q11.sql rename to core/src/test/resources/tpch-queries-double/q11.sql diff --git a/core/src/test/resources/tpch-queries/q12.sql b/core/src/test/resources/tpch-queries-double/q12.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q12.sql rename to core/src/test/resources/tpch-queries-double/q12.sql diff --git a/core/src/test/resources/tpch-queries/q13.sql b/core/src/test/resources/tpch-queries-double/q13.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q13.sql rename to core/src/test/resources/tpch-queries-double/q13.sql diff --git a/core/src/test/resources/tpch-queries/q14.sql b/core/src/test/resources/tpch-queries-double/q14.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q14.sql rename to core/src/test/resources/tpch-queries-double/q14.sql diff --git a/core/src/test/resources/tpch-queries/q15.sql b/core/src/test/resources/tpch-queries-double/q15.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q15.sql rename to core/src/test/resources/tpch-queries-double/q15.sql diff --git a/core/src/test/resources/tpch-queries/q16.sql b/core/src/test/resources/tpch-queries-double/q16.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q16.sql rename to core/src/test/resources/tpch-queries-double/q16.sql diff --git a/core/src/test/resources/tpch-queries/q17.sql b/core/src/test/resources/tpch-queries-double/q17.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q17.sql rename to core/src/test/resources/tpch-queries-double/q17.sql diff --git a/core/src/test/resources/tpch-queries/q18.sql b/core/src/test/resources/tpch-queries-double/q18.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q18.sql rename to core/src/test/resources/tpch-queries-double/q18.sql diff --git a/core/src/test/resources/tpch-queries/q19.sql b/core/src/test/resources/tpch-queries-double/q19.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q19.sql rename to core/src/test/resources/tpch-queries-double/q19.sql diff --git a/core/src/test/resources/tpch-queries/q2.sql b/core/src/test/resources/tpch-queries-double/q2.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q2.sql rename to core/src/test/resources/tpch-queries-double/q2.sql diff --git a/core/src/test/resources/tpch-queries/q20.sql b/core/src/test/resources/tpch-queries-double/q20.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q20.sql rename to core/src/test/resources/tpch-queries-double/q20.sql diff --git a/core/src/test/resources/tpch-queries/q21.sql b/core/src/test/resources/tpch-queries-double/q21.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q21.sql rename to core/src/test/resources/tpch-queries-double/q21.sql diff --git a/core/src/test/resources/tpch-queries/q22.sql b/core/src/test/resources/tpch-queries-double/q22.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q22.sql rename to core/src/test/resources/tpch-queries-double/q22.sql diff --git a/core/src/test/resources/tpch-queries/q3.sql b/core/src/test/resources/tpch-queries-double/q3.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q3.sql rename to core/src/test/resources/tpch-queries-double/q3.sql diff --git a/core/src/test/resources/tpch-queries/q4.sql b/core/src/test/resources/tpch-queries-double/q4.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q4.sql rename to core/src/test/resources/tpch-queries-double/q4.sql diff --git a/core/src/test/resources/tpch-queries/q5.sql b/core/src/test/resources/tpch-queries-double/q5.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q5.sql rename to core/src/test/resources/tpch-queries-double/q5.sql diff --git a/core/src/test/resources/tpch-queries/q6.sql b/core/src/test/resources/tpch-queries-double/q6.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q6.sql rename to core/src/test/resources/tpch-queries-double/q6.sql diff --git a/core/src/test/resources/tpch-queries/q7.sql b/core/src/test/resources/tpch-queries-double/q7.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q7.sql rename to core/src/test/resources/tpch-queries-double/q7.sql diff --git a/core/src/test/resources/tpch-queries/q8.sql b/core/src/test/resources/tpch-queries-double/q8.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q8.sql rename to core/src/test/resources/tpch-queries-double/q8.sql diff --git a/core/src/test/resources/tpch-queries/q9.sql b/core/src/test/resources/tpch-queries-double/q9.sql similarity index 100% rename from core/src/test/resources/tpch-queries/q9.sql rename to core/src/test/resources/tpch-queries-double/q9.sql diff --git a/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala b/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala new file mode 100644 index 000000000..cca9be0df --- /dev/null +++ b/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.oap.tpc.ds + +import com.intel.oap.tpc.util.TPCRunner +import org.apache.log4j.{Level, LogManager} +import org.apache.spark.SparkConf +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +class TPCDSSuite extends QueryTest with SharedSparkSession { + + private val MAX_DIRECT_MEMORY = "6g" + private val TPCDS_QUERIES_RESOURCE = "tpcds-queries-double" + private val TPCDS_WRITE_PATH = "/tmp/tpcds-generated" + + private var runner: TPCRunner = _ + + override protected def sparkConf: SparkConf = { + val conf = super.sparkConf + conf.set("spark.memory.offHeap.size", String.valueOf(MAX_DIRECT_MEMORY)) + .set("spark.sql.extensions", "com.intel.oap.ColumnarPlugin") + .set("spark.sql.codegen.wholeStage", "false") + .set("spark.sql.sources.useV1SourceList", "") + .set("spark.sql.columnar.tmp_dir", "/tmp/") + .set("spark.sql.adaptive.enabled", "false") + .set("spark.sql.columnar.sort.broadcastJoin", "true") + .set("spark.storage.blockManagerSlaveTimeoutMs", "3600000") + .set("spark.executor.heartbeatInterval", "3600000") + .set("spark.network.timeout", "3601s") + .set("spark.oap.sql.columnar.preferColumnar", "true") + .set("spark.sql.columnar.codegen.hashAggregate", "false") + .set("spark.sql.columnar.sort", "true") + .set("spark.sql.columnar.window", "true") + .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") + .set("spark.unsafe.exceptionOnMemoryLeak", "false") + .set("spark.network.io.preferDirectBufs", "false") + return conf + } + + + override def beforeAll(): Unit = { + super.beforeAll() + LogManager.getRootLogger.setLevel(Level.WARN) + new TPCDSTableGen(spark, 0.1D, TPCDS_WRITE_PATH).gen() + runner = new TPCRunner(spark, TPCDS_QUERIES_RESOURCE) + } + + override def afterAll(): Unit = { + super.afterAll() + } + + test("window queries") { + runner.runTPCQuery("q12", 1, true) + runner.runTPCQuery("q20", 1, true) + runner.runTPCQuery("q36", 1, true) + runner.runTPCQuery("q44", 1, true) + runner.runTPCQuery("q47", 1, true) + runner.runTPCQuery("q49", 1, true) + runner.runTPCQuery("q51", 1, true) + runner.runTPCQuery("q53", 1, true) + runner.runTPCQuery("q57", 1, true) + runner.runTPCQuery("q63", 1, true) + runner.runTPCQuery("q67", 1, true) + runner.runTPCQuery("q70", 1, true) + runner.runTPCQuery("q86", 1, true) + runner.runTPCQuery("q89", 1, true) + runner.runTPCQuery("q98", 1, true) + } +} + +object TPCDSSuite { + def stdoutLog(line: Any): Unit = { + println("[RAM Reporter] %s".format(line)) + } +} diff --git a/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSTableGen.scala b/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSTableGen.scala new file mode 100644 index 000000000..695ee43be --- /dev/null +++ b/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSTableGen.scala @@ -0,0 +1,676 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.oap.tpc.ds + +import java.io.{File, IOException} + +import com.intel.oap.tpc.ds.TPCDSTableGen._ +import io.trino.tpcds.Results.constructResults +import io.trino.tpcds._ +import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{Column, Row, SparkSession} + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ListBuffer + +class TPCDSTableGen(val spark: SparkSession, scale: Double, path: String) extends Serializable { + + def writeParquetTable(name: String, rows: List[Row]): Unit = { + if (name.equals("dbgen_version")) { + return + } + val schema = name match { + case "catalog_sales" => catalogSalesSchema + case "catalog_returns" => catalogReturnsSchema + case "inventory" => inventorySchema + case "store_sales" => storeSalesSchema + case "store_returns" => storeReturnsSchema + case "web_sales" => webSalesSchema + case "web_returns" => webReturnsSchema + case "call_center" => callCenterSchema + case "catalog_page" => catalogPageSchema + case "customer" => customerSchema + case "customer_address" => customerAddressSchema + case "customer_demographics" => customerDemographicsSchema + case "date_dim" => dateDimSchema + case "household_demographics" => householdDemographicsSchema + case "income_band" => incomeBandSchema + case "item" => itemSchema + case "promotion" => promotionSchema + case "reason" => reasonSchema + case "ship_mode" => shipModeSchema + case "store" => storeSchema + case "time_dim" => timeDimSchema + case "warehouse" => warehouseSchema + case "web_page" => webPageSchema + case "web_site" => webSiteSchema + } + writeParquetTable(name, rows, schema) + } + + private def writeParquetTable(tableName: String, rows: List[Row], schema: StructType): Unit = { + if (rows.isEmpty) { + return + } + + val stringData = spark.range(0L, rows.size, 1L, 1) + .mapPartitions { itr => + val rowItr = rows.iterator + itr.map { _ => + rowItr.next() + } + }(RowEncoder(StructType(schema.fields.map(f => StructField(f.name, StringType))))) + + val convertedData = { + val columns = schema.fields.map { f => + new Column(f.name).cast(f.dataType).as(f.name) + } + stringData.select(columns: _*) + } + + convertedData.coalesce(1) + .write + .format("parquet") + .mode("overwrite") + .save(path + File.separator + tableName) + } + + def gen(): Unit = { + val options = new Options() + options.scale = 0.01D + val session = options.toSession + val tableGenerator = new Gen(session) + Table.getBaseTables.forEach { t => + val (p, c): (List[Row], List[Row]) = tableGenerator.generateSparkRows(t) + writeParquetTable(t.getName, p) + if (t.hasChild) { + writeParquetTable(t.getChild.getName, c) + } + } + + val files = new File(path).listFiles() + files.foreach(file => { + println("Creating catalog table: " + file.getName) + spark.catalog.createTable(file.getName, file.getAbsolutePath, "arrow") + try { + spark.catalog.recoverPartitions(file.getName) + } catch { + case _: Throwable => + } + }) + } +} + +object TPCDSTableGen { + + class Gen(session: Session) extends TableGenerator(session) { + + override def generateTable(table: Table): Unit = { + throw new UnsupportedOperationException + } + + def generateSparkRows(table: Table): (List[Row], List[Row]) = { + if (table.isChild && !session.generateOnlyOneTable) { + return (List.empty, List.empty) + } + val parentRows = ListBuffer[Row]() + val childRows = ListBuffer[Row]() + try { + val results = constructResults(table, session) + for (parentAndChildRows <- results.asScala) { + if (parentAndChildRows.size > 0) { + val parentRow = parentAndChildRows.get(0).asScala + parentRows.append(Row.fromSeq(parentRow)) + } + if (parentAndChildRows.size > 1) { + val childRow = parentAndChildRows.get(1).asScala + childRows.append(Row.fromSeq(childRow)) + } + } + } catch { + case e: IOException => + throw new TpcdsException(e.getMessage) + } + (parentRows.toList, childRows.toList) + } + } + + // generated by script + private def catalogSalesSchema = { + StructType(Seq( + StructField("cs_sold_date_sk", LongType), + StructField("cs_sold_time_sk", LongType), + StructField("cs_ship_date_sk", LongType), + StructField("cs_bill_customer_sk", LongType), + StructField("cs_bill_cdemo_sk", LongType), + StructField("cs_bill_hdemo_sk", LongType), + StructField("cs_bill_addr_sk", LongType), + StructField("cs_ship_customer_sk", LongType), + StructField("cs_ship_cdemo_sk", LongType), + StructField("cs_ship_hdemo_sk", LongType), + StructField("cs_ship_addr_sk", LongType), + StructField("cs_call_center_sk", LongType), + StructField("cs_catalog_page_sk", LongType), + StructField("cs_ship_mode_sk", LongType), + StructField("cs_warehouse_sk", LongType), + StructField("cs_item_sk", LongType), + StructField("cs_promo_sk", LongType), + StructField("cs_order_number", LongType), + StructField("cs_quantity", LongType), + StructField("cs_wholesale_cost", DoubleType), + StructField("cs_list_price", DoubleType), + StructField("cs_sales_price", DoubleType), + StructField("cs_ext_discount_amt", DoubleType), + StructField("cs_ext_sales_price", DoubleType), + StructField("cs_ext_wholesale_cost", DoubleType), + StructField("cs_ext_list_price", DoubleType), + StructField("cs_ext_tax", DoubleType), + StructField("cs_coupon_amt", DoubleType), + StructField("cs_ext_ship_cost", DoubleType), + StructField("cs_net_paid", DoubleType), + StructField("cs_net_paid_inc_tax", DoubleType), + StructField("cs_net_paid_inc_ship", DoubleType), + StructField("cs_net_paid_inc_ship_tax", DoubleType), + StructField("cs_net_profit", DoubleType) + )) + } + private def catalogReturnsSchema = { + StructType(Seq( + StructField("cr_returned_date_sk", LongType), + StructField("cr_returned_time_sk", LongType), + StructField("cr_item_sk", LongType), + StructField("cr_refunded_customer_sk", LongType), + StructField("cr_refunded_cdemo_sk", LongType), + StructField("cr_refunded_hdemo_sk", LongType), + StructField("cr_refunded_addr_sk", LongType), + StructField("cr_returning_customer_sk", LongType), + StructField("cr_returning_cdemo_sk", LongType), + StructField("cr_returning_hdemo_sk", LongType), + StructField("cr_returning_addr_sk", LongType), + StructField("cr_call_center_sk", LongType), + StructField("cr_catalog_page_sk", LongType), + StructField("cr_ship_mode_sk", LongType), + StructField("cr_warehouse_sk", LongType), + StructField("cr_reason_sk", LongType), + StructField("cr_order_number", LongType), + StructField("cr_return_quantity", LongType), + StructField("cr_return_amount", DoubleType), + StructField("cr_return_tax", DoubleType), + StructField("cr_return_amt_inc_tax", DoubleType), + StructField("cr_fee", DoubleType), + StructField("cr_return_ship_cost", DoubleType), + StructField("cr_refunded_cash", DoubleType), + StructField("cr_reversed_charge", DoubleType), + StructField("cr_store_credit", DoubleType), + StructField("cr_net_loss", DoubleType) + )) + } + private def inventorySchema = { + StructType(Seq( + StructField("inv_date_sk", LongType), + StructField("inv_item_sk", LongType), + StructField("inv_warehouse_sk", LongType), + StructField("inv_quantity_on_hand", LongType) + )) + } + private def storeSalesSchema = { + StructType(Seq( + StructField("ss_sold_date_sk", LongType), + StructField("ss_sold_time_sk", LongType), + StructField("ss_item_sk", LongType), + StructField("ss_customer_sk", LongType), + StructField("ss_cdemo_sk", LongType), + StructField("ss_hdemo_sk", LongType), + StructField("ss_addr_sk", LongType), + StructField("ss_store_sk", LongType), + StructField("ss_promo_sk", LongType), + StructField("ss_ticket_number", LongType), + StructField("ss_quantity", LongType), + StructField("ss_wholesale_cost", DoubleType), + StructField("ss_list_price", DoubleType), + StructField("ss_sales_price", DoubleType), + StructField("ss_ext_discount_amt", DoubleType), + StructField("ss_ext_sales_price", DoubleType), + StructField("ss_ext_wholesale_cost", DoubleType), + StructField("ss_ext_list_price", DoubleType), + StructField("ss_ext_tax", DoubleType), + StructField("ss_coupon_amt", DoubleType), + StructField("ss_net_paid", DoubleType), + StructField("ss_net_paid_inc_tax", DoubleType), + StructField("ss_net_profit", DoubleType) + )) + } + private def storeReturnsSchema = { + StructType(Seq( + StructField("sr_returned_date_sk", LongType), + StructField("sr_return_time_sk", LongType), + StructField("sr_item_sk", LongType), + StructField("sr_customer_sk", LongType), + StructField("sr_cdemo_sk", LongType), + StructField("sr_hdemo_sk", LongType), + StructField("sr_addr_sk", LongType), + StructField("sr_store_sk", LongType), + StructField("sr_reason_sk", LongType), + StructField("sr_ticket_number", LongType), + StructField("sr_return_quantity", LongType), + StructField("sr_return_amt", DoubleType), + StructField("sr_return_tax", DoubleType), + StructField("sr_return_amt_inc_tax", DoubleType), + StructField("sr_fee", DoubleType), + StructField("sr_return_ship_cost", DoubleType), + StructField("sr_refunded_cash", DoubleType), + StructField("sr_reversed_charge", DoubleType), + StructField("sr_store_credit", DoubleType), + StructField("sr_net_loss", DoubleType) + )) + } + private def webSalesSchema = { + StructType(Seq( + StructField("ws_sold_date_sk", LongType), + StructField("ws_sold_time_sk", LongType), + StructField("ws_ship_date_sk", LongType), + StructField("ws_item_sk", LongType), + StructField("ws_bill_customer_sk", LongType), + StructField("ws_bill_cdemo_sk", LongType), + StructField("ws_bill_hdemo_sk", LongType), + StructField("ws_bill_addr_sk", LongType), + StructField("ws_ship_customer_sk", LongType), + StructField("ws_ship_cdemo_sk", LongType), + StructField("ws_ship_hdemo_sk", LongType), + StructField("ws_ship_addr_sk", LongType), + StructField("ws_web_page_sk", LongType), + StructField("ws_web_site_sk", LongType), + StructField("ws_ship_mode_sk", LongType), + StructField("ws_warehouse_sk", LongType), + StructField("ws_promo_sk", LongType), + StructField("ws_order_number", LongType), + StructField("ws_quantity", LongType), + StructField("ws_wholesale_cost", DoubleType), + StructField("ws_list_price", DoubleType), + StructField("ws_sales_price", DoubleType), + StructField("ws_ext_discount_amt", DoubleType), + StructField("ws_ext_sales_price", DoubleType), + StructField("ws_ext_wholesale_cost", DoubleType), + StructField("ws_ext_list_price", DoubleType), + StructField("ws_ext_tax", DoubleType), + StructField("ws_coupon_amt", DoubleType), + StructField("ws_ext_ship_cost", DoubleType), + StructField("ws_net_paid", DoubleType), + StructField("ws_net_paid_inc_tax", DoubleType), + StructField("ws_net_paid_inc_ship", DoubleType), + StructField("ws_net_paid_inc_ship_tax", DoubleType), + StructField("ws_net_profit", DoubleType) + )) + } + private def webReturnsSchema = { + StructType(Seq( + StructField("wr_returned_date_sk", LongType), + StructField("wr_returned_time_sk", LongType), + StructField("wr_item_sk", LongType), + StructField("wr_refunded_customer_sk", LongType), + StructField("wr_refunded_cdemo_sk", LongType), + StructField("wr_refunded_hdemo_sk", LongType), + StructField("wr_refunded_addr_sk", LongType), + StructField("wr_returning_customer_sk", LongType), + StructField("wr_returning_cdemo_sk", LongType), + StructField("wr_returning_hdemo_sk", LongType), + StructField("wr_returning_addr_sk", LongType), + StructField("wr_web_page_sk", LongType), + StructField("wr_reason_sk", LongType), + StructField("wr_order_number", LongType), + StructField("wr_return_quantity", LongType), + StructField("wr_return_amt", DoubleType), + StructField("wr_return_tax", DoubleType), + StructField("wr_return_amt_inc_tax", DoubleType), + StructField("wr_fee", DoubleType), + StructField("wr_return_ship_cost", DoubleType), + StructField("wr_refunded_cash", DoubleType), + StructField("wr_reversed_charge", DoubleType), + StructField("wr_account_credit", DoubleType), + StructField("wr_net_loss", DoubleType) + )) + } + private def callCenterSchema = { + StructType(Seq( + StructField("cc_call_center_sk", LongType), + StructField("cc_call_center_id", StringType), + StructField("cc_rec_start_date", DateType), + StructField("cc_rec_end_date", DateType), + StructField("cc_closed_date_sk", LongType), + StructField("cc_open_date_sk", LongType), + StructField("cc_name", StringType), + StructField("cc_class", StringType), + StructField("cc_employees", LongType), + StructField("cc_sq_ft", LongType), + StructField("cc_hours", StringType), + StructField("cc_manager", StringType), + StructField("cc_mkt_id", LongType), + StructField("cc_mkt_class", StringType), + StructField("cc_mkt_desc", StringType), + StructField("cc_market_manager", StringType), + StructField("cc_division", LongType), + StructField("cc_division_name", StringType), + StructField("cc_company", LongType), + StructField("cc_company_name", StringType), + StructField("cc_street_number", StringType), + StructField("cc_street_name", StringType), + StructField("cc_street_type", StringType), + StructField("cc_suite_number", StringType), + StructField("cc_city", StringType), + StructField("cc_county", StringType), + StructField("cc_state", StringType), + StructField("cc_zip", StringType), + StructField("cc_country", StringType), + StructField("cc_gmt_offset", DoubleType), + StructField("cc_tax_percentage", DoubleType) + )) + } + private def catalogPageSchema = { + StructType(Seq( + StructField("cp_catalog_page_sk", LongType), + StructField("cp_catalog_page_id", StringType), + StructField("cp_start_date_sk", LongType), + StructField("cp_end_date_sk", LongType), + StructField("cp_department", StringType), + StructField("cp_catalog_number", LongType), + StructField("cp_catalog_page_number", LongType), + StructField("cp_description", StringType), + StructField("cp_type", StringType) + )) + } + private def customerSchema = { + StructType(Seq( + StructField("c_customer_sk", LongType), + StructField("c_customer_id", StringType), + StructField("c_current_cdemo_sk", LongType), + StructField("c_current_hdemo_sk", LongType), + StructField("c_current_addr_sk", LongType), + StructField("c_first_shipto_date_sk", LongType), + StructField("c_first_sales_date_sk", LongType), + StructField("c_salutation", StringType), + StructField("c_first_name", StringType), + StructField("c_last_name", StringType), + StructField("c_preferred_cust_flag", StringType), + StructField("c_birth_day", LongType), + StructField("c_birth_month", LongType), + StructField("c_birth_year", LongType), + StructField("c_birth_country", StringType), + StructField("c_login", StringType), + StructField("c_email_address", StringType), + StructField("c_last_review_date", StringType) + )) + } + private def customerAddressSchema = { + StructType(Seq( + StructField("ca_address_sk", LongType), + StructField("ca_address_id", StringType), + StructField("ca_street_number", StringType), + StructField("ca_street_name", StringType), + StructField("ca_street_type", StringType), + StructField("ca_suite_number", StringType), + StructField("ca_city", StringType), + StructField("ca_county", StringType), + StructField("ca_state", StringType), + StructField("ca_zip", StringType), + StructField("ca_country", StringType), + StructField("ca_gmt_offset", DoubleType), + StructField("ca_location_type", StringType) + )) + } + private def customerDemographicsSchema = { + StructType(Seq( + StructField("cd_demo_sk", LongType), + StructField("cd_gender", StringType), + StructField("cd_marital_status", StringType), + StructField("cd_education_status", StringType), + StructField("cd_purchase_estimate", LongType), + StructField("cd_credit_rating", StringType), + StructField("cd_dep_count", LongType), + StructField("cd_dep_employed_count", LongType), + StructField("cd_dep_college_count", LongType) + )) + } + private def dateDimSchema = { + StructType(Seq( + StructField("d_date_sk", LongType), + StructField("d_date_id", StringType), + StructField("d_date", StringType), + StructField("d_month_seq", LongType), + StructField("d_week_seq", LongType), + StructField("d_quarter_seq", LongType), + StructField("d_year", LongType), + StructField("d_dow", LongType), + StructField("d_moy", LongType), + StructField("d_dom", LongType), + StructField("d_qoy", LongType), + StructField("d_fy_year", LongType), + StructField("d_fy_quarter_seq", LongType), + StructField("d_fy_week_seq", LongType), + StructField("d_day_name", StringType), + StructField("d_quarter_name", StringType), + StructField("d_holiday", StringType), + StructField("d_weekend", StringType), + StructField("d_following_holiday", StringType), + StructField("d_first_dom", LongType), + StructField("d_last_dom", LongType), + StructField("d_same_day_ly", LongType), + StructField("d_same_day_lq", LongType), + StructField("d_current_day", StringType), + StructField("d_current_week", StringType), + StructField("d_current_month", StringType), + StructField("d_current_quarter", StringType), + StructField("d_current_year", StringType) + )) + } + private def householdDemographicsSchema = { + StructType(Seq( + StructField("hd_demo_sk", LongType), + StructField("hd_income_band_sk", LongType), + StructField("hd_buy_potential", StringType), + StructField("hd_dep_count", LongType), + StructField("hd_vehicle_count", LongType) + )) + } + private def incomeBandSchema = { + StructType(Seq( + StructField("ib_income_band_sk", LongType), + StructField("ib_lower_bound", LongType), + StructField("ib_upper_bound", LongType) + )) + } + private def itemSchema = { + StructType(Seq( + StructField("i_item_sk", LongType), + StructField("i_item_id", StringType), + StructField("i_rec_start_date", StringType), + StructField("i_rec_end_date", StringType), + StructField("i_item_desc", StringType), + StructField("i_current_price", DoubleType), + StructField("i_wholesale_cost", DoubleType), + StructField("i_brand_id", LongType), + StructField("i_brand", StringType), + StructField("i_class_id", LongType), + StructField("i_class", StringType), + StructField("i_category_id", LongType), + StructField("i_category", StringType), + StructField("i_manufact_id", LongType), + StructField("i_manufact", StringType), + StructField("i_size", StringType), + StructField("i_formulation", StringType), + StructField("i_color", StringType), + StructField("i_units", StringType), + StructField("i_container", StringType), + StructField("i_manager_id", LongType), + StructField("i_product_name", StringType) + )) + } + private def promotionSchema = { + StructType(Seq( + StructField("p_promo_sk", LongType), + StructField("p_promo_id", StringType), + StructField("p_start_date_sk", LongType), + StructField("p_end_date_sk", LongType), + StructField("p_item_sk", LongType), + StructField("p_cost", DoubleType), + StructField("p_response_target", LongType), + StructField("p_promo_name", StringType), + StructField("p_channel_dmail", StringType), + StructField("p_channel_email", StringType), + StructField("p_channel_catalog", StringType), + StructField("p_channel_tv", StringType), + StructField("p_channel_radio", StringType), + StructField("p_channel_press", StringType), + StructField("p_channel_event", StringType), + StructField("p_channel_demo", StringType), + StructField("p_channel_details", StringType), + StructField("p_purpose", StringType), + StructField("p_discount_active", StringType) + )) + } + private def reasonSchema = { + StructType(Seq( + StructField("r_reason_sk", LongType), + StructField("r_reason_id", StringType), + StructField("r_reason_desc", StringType) + )) + } + private def shipModeSchema = { + StructType(Seq( + StructField("sm_ship_mode_sk", LongType), + StructField("sm_ship_mode_id", StringType), + StructField("sm_type", StringType), + StructField("sm_code", StringType), + StructField("sm_carrier", StringType), + StructField("sm_contract", StringType) + )) + } + private def storeSchema = { + StructType(Seq( + StructField("s_store_sk", LongType), + StructField("s_store_id", StringType), + StructField("s_rec_start_date", StringType), + StructField("s_rec_end_date", StringType), + StructField("s_closed_date_sk", LongType), + StructField("s_store_name", StringType), + StructField("s_number_employees", LongType), + StructField("s_floor_space", LongType), + StructField("s_hours", StringType), + StructField("s_manager", StringType), + StructField("s_market_id", LongType), + StructField("s_geography_class", StringType), + StructField("s_market_desc", StringType), + StructField("s_market_manager", StringType), + StructField("s_division_id", LongType), + StructField("s_division_name", StringType), + StructField("s_company_id", LongType), + StructField("s_company_name", StringType), + StructField("s_street_number", StringType), + StructField("s_street_name", StringType), + StructField("s_street_type", StringType), + StructField("s_suite_number", StringType), + StructField("s_city", StringType), + StructField("s_county", StringType), + StructField("s_state", StringType), + StructField("s_zip", StringType), + StructField("s_country", StringType), + StructField("s_gmt_offset", DoubleType), + StructField("s_tax_precentage", DoubleType) + )) + } + private def timeDimSchema = { + StructType(Seq( + StructField("t_time_sk", LongType), + StructField("t_time_id", StringType), + StructField("t_time", LongType), + StructField("t_hour", LongType), + StructField("t_minute", LongType), + StructField("t_second", LongType), + StructField("t_am_pm", StringType), + StructField("t_shift", StringType), + StructField("t_sub_shift", StringType), + StructField("t_meal_time", StringType) + )) + } + private def warehouseSchema = { + StructType(Seq( + StructField("w_warehouse_sk", LongType), + StructField("w_warehouse_id", StringType), + StructField("w_warehouse_name", StringType), + StructField("w_warehouse_sq_ft", LongType), + StructField("w_street_number", StringType), + StructField("w_street_name", StringType), + StructField("w_street_type", StringType), + StructField("w_suite_number", StringType), + StructField("w_city", StringType), + StructField("w_county", StringType), + StructField("w_state", StringType), + StructField("w_zip", StringType), + StructField("w_country", StringType), + StructField("w_gmt_offset", DoubleType) + )) + } + private def webPageSchema = { + StructType(Seq( + StructField("wp_web_page_sk", LongType), + StructField("wp_web_page_id", StringType), + StructField("wp_rec_start_date", DateType), + StructField("wp_rec_end_date", DateType), + StructField("wp_creation_date_sk", LongType), + StructField("wp_access_date_sk", LongType), + StructField("wp_autogen_flag", StringType), + StructField("wp_customer_sk", LongType), + StructField("wp_url", StringType), + StructField("wp_type", StringType), + StructField("wp_char_count", LongType), + StructField("wp_link_count", LongType), + StructField("wp_image_count", LongType), + StructField("wp_max_ad_count", LongType) + )) + } + private def webSiteSchema = { + StructType(Seq( + StructField("web_site_sk", LongType), + StructField("web_site_id", StringType), + StructField("web_rec_start_date", DateType), + StructField("web_rec_end_date", DateType), + StructField("web_name", StringType), + StructField("web_open_date_sk", LongType), + StructField("web_close_date_sk", LongType), + StructField("web_class", StringType), + StructField("web_manager", StringType), + StructField("web_mkt_id", LongType), + StructField("web_mkt_class", StringType), + StructField("web_mkt_desc", StringType), + StructField("web_market_manager", StringType), + StructField("web_company_id", LongType), + StructField("web_company_name", StringType), + StructField("web_street_number", StringType), + StructField("web_street_name", StringType), + StructField("web_street_type", StringType), + StructField("web_suite_number", StringType), + StructField("web_city", StringType), + StructField("web_county", StringType), + StructField("web_state", StringType), + StructField("web_zip", StringType), + StructField("web_country", StringType), + StructField("web_gmt_offset", StringType), + StructField("web_tax_percentage", DoubleType) + )) + } +} diff --git a/core/src/test/scala/com/intel/oap/tpch/GitHubActionEntries.scala b/core/src/test/scala/com/intel/oap/tpc/h/GitHubActionEntries.scala similarity index 92% rename from core/src/test/scala/com/intel/oap/tpch/GitHubActionEntries.scala rename to core/src/test/scala/com/intel/oap/tpc/h/GitHubActionEntries.scala index d2f064d00..a8b57bd3c 100644 --- a/core/src/test/scala/com/intel/oap/tpch/GitHubActionEntries.scala +++ b/core/src/test/scala/com/intel/oap/tpc/h/GitHubActionEntries.scala @@ -1,9 +1,8 @@ -package com.intel.oap.tpch +package com.intel.oap.tpc.h import java.io.File import com.intel.oap.tags.CommentOnContextPR -import com.intel.oap.tpch.TPCHSuite.stdoutLog import org.apache.commons.io.FileUtils import org.apache.commons.lang.StringUtils import org.codehaus.jackson.map.ObjectMapper @@ -26,7 +25,7 @@ class GitHubActionEntries extends FunSuite { } val repoSlug = System.getenv("GITHUB_REPOSITORY") - stdoutLog("Reading essential env variables... " + + println("Reading essential env variables... " + "Envs: GITHUB_REPOSITORY: %s" .format(repoSlug)) if (StringUtils.isEmpty(repoSlug)) { @@ -34,7 +33,7 @@ class GitHubActionEntries extends FunSuite { } val eventPath = System.getenv("PREVIOUS_EVENT_PATH") - stdoutLog("Reading essential env variables... " + + println("Reading essential env variables... " + "Envs: PREVIOUS_EVENT_PATH: %s" .format(eventPath)) if (StringUtils.isEmpty(eventPath)) { diff --git a/core/src/test/scala/com/intel/oap/tpch/TPCHSuite.scala b/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala similarity index 51% rename from core/src/test/scala/com/intel/oap/tpch/TPCHSuite.scala rename to core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala index c419b300a..687c963b1 100644 --- a/core/src/test/scala/com/intel/oap/tpch/TPCHSuite.scala +++ b/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala @@ -15,27 +15,22 @@ * limitations under the License. */ -package com.intel.oap.tpch +package com.intel.oap.tpc.h -import java.io.{File, FileOutputStream, InputStreamReader, OutputStreamWriter} +import java.io.{FileOutputStream, InputStreamReader, OutputStreamWriter} import java.lang.management.ManagementFactory -import java.nio.charset.StandardCharsets -import java.sql.Date -import java.text.SimpleDateFormat import java.util.concurrent.{Executors, ScheduledFuture, TimeUnit} import java.util.{Scanner, StringTokenizer} import com.intel.oap.tags.{BroadcastHashJoinMode, SortMergeJoinMode, TestAndWriteLogs} -import com.intel.oap.tpch.TPCHSuite.RAMMonitor -import io.prestosql.tpch._ -import org.apache.commons.io.FileUtils +import com.intel.oap.tpc.MallocUtils +import com.intel.oap.tpc.h.TPCHSuite.RAMMonitor +import com.intel.oap.tpc.util.TPCRunner import org.apache.commons.lang.StringUtils import org.apache.log4j.{Level, LogManager} import org.apache.spark.SparkConf -import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.QueryTest import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types._ -import org.apache.spark.sql.{QueryTest, Row, SaveMode} import org.knowm.xchart.BitmapEncoder.BitmapFormat import org.knowm.xchart.XYSeries.XYSeriesRenderStyle import org.knowm.xchart.style.Styler.ChartTheme @@ -46,8 +41,11 @@ import scala.collection.mutable.ArrayBuffer class TPCHSuite extends QueryTest with SharedSparkSession { private val MAX_DIRECT_MEMORY = "6g" + private val TPCH_QUERIES_RESOURCE = "tpch-queries-double" private val TPCH_WRITE_PATH = "/tmp/tpch-generated" + private var runner: TPCRunner = _ + override protected def sparkConf: SparkConf = { val conf = super.sparkConf conf.set("spark.memory.offHeap.size", String.valueOf(MAX_DIRECT_MEMORY)) @@ -74,309 +72,14 @@ class TPCHSuite extends QueryTest with SharedSparkSession { override def beforeAll(): Unit = { super.beforeAll() LogManager.getRootLogger.setLevel(Level.WARN) - createTPCHTables() + new TPCHTableGen(spark, 0.1D, TPCH_WRITE_PATH).gen() + runner = new TPCRunner(spark, TPCH_QUERIES_RESOURCE) } override def afterAll(): Unit = { super.afterAll() } - def createTPCHTables(): Unit = { - // gen tpc-h data - val scale = 0.1D - val dateFormat = new SimpleDateFormat("yyyy-MM-dd") - - // lineitem - def lineItemGenerator = { () => - new LineItemGenerator(scale, 1, 1) - } - - def lineItemSchema = { - StructType(Seq( - StructField("l_orderkey", LongType), - StructField("l_partkey", LongType), - StructField("l_suppkey", LongType), - StructField("l_linenumber", IntegerType), - StructField("l_quantity", LongType), - StructField("l_extendedprice", DoubleType), - StructField("l_discount", DoubleType), - StructField("l_tax", DoubleType), - StructField("l_returnflag", StringType), - StructField("l_linestatus", StringType), - StructField("l_commitdate", DateType), - StructField("l_receiptdate", DateType), - StructField("l_shipinstruct", StringType), - StructField("l_shipmode", StringType), - StructField("l_comment", StringType), - StructField("l_shipdate", DateType) - )) - } - - def lineItemParser: LineItem => Row = - lineItem => - Row( - lineItem.getOrderKey, - lineItem.getPartKey, - lineItem.getSupplierKey, - lineItem.getLineNumber, - lineItem.getQuantity, - lineItem.getExtendedPrice, - lineItem.getDiscount, - lineItem.getTax, - lineItem.getReturnFlag, - lineItem.getStatus, - Date.valueOf(GenerateUtils.formatDate(lineItem.getCommitDate)), - Date.valueOf(GenerateUtils.formatDate(lineItem.getReceiptDate)), - lineItem.getShipInstructions, - lineItem.getShipMode, - lineItem.getComment, - Date.valueOf(GenerateUtils.formatDate(lineItem.getShipDate)) - ) - - // customer - def customerGenerator = { () => - new CustomerGenerator(scale, 1, 1) - } - - def customerSchema = { - StructType(Seq( - StructField("c_custkey", LongType), - StructField("c_name", StringType), - StructField("c_address", StringType), - StructField("c_nationkey", LongType), - StructField("c_phone", StringType), - StructField("c_acctbal", DoubleType), - StructField("c_comment", StringType), - StructField("c_mktsegment", StringType) - )) - } - - def customerParser: Customer => Row = - customer => - Row( - customer.getCustomerKey, - customer.getName, - customer.getAddress, - customer.getNationKey, - customer.getPhone, - customer.getAccountBalance, - customer.getComment, - customer.getMarketSegment, - ) - - def rowCountOf[U](itr: java.lang.Iterable[U]): Long = { - var cnt = 0L - val iterator = itr.iterator - while (iterator.hasNext) { - iterator.next() - cnt = cnt + 1 - } - cnt - } - - // orders - def orderGenerator = { () => - new OrderGenerator(scale, 1, 1) - } - - def orderSchema = { - StructType(Seq( - StructField("o_orderkey", LongType), - StructField("o_custkey", LongType), - StructField("o_orderstatus", StringType), - StructField("o_totalprice", DoubleType), - StructField("o_orderpriority", StringType), - StructField("o_clerk", StringType), - StructField("o_shippriority", IntegerType), - StructField("o_comment", StringType), - StructField("o_orderdate", DateType) - )) - } - - def orderParser: Order => Row = - order => - Row( - order.getOrderKey, - order.getCustomerKey, - String.valueOf(order.getOrderStatus), - order.getTotalPrice, - order.getOrderPriority, - order.getClerk, - order.getShipPriority, - order.getComment, - Date.valueOf(GenerateUtils.formatDate(order.getOrderDate)) - ) - - // partsupp - def partSupplierGenerator = { () => - new PartSupplierGenerator(scale, 1, 1) - } - - def partSupplierSchema = { - StructType(Seq( - StructField("ps_partkey", LongType), - StructField("ps_suppkey", LongType), - StructField("ps_availqty", IntegerType), - StructField("ps_supplycost", DoubleType), - StructField("ps_comment", StringType) - )) - } - - def partSupplierParser: PartSupplier => Row = - ps => - Row( - ps.getPartKey, - ps.getSupplierKey, - ps.getAvailableQuantity, - ps.getSupplyCost, - ps.getComment - ) - - // supplier - def supplierGenerator = { () => - new SupplierGenerator(scale, 1, 1) - } - - def supplierSchema = { - StructType(Seq( - StructField("s_suppkey", LongType), - StructField("s_name", StringType), - StructField("s_address", StringType), - StructField("s_nationkey", LongType), - StructField("s_phone", StringType), - StructField("s_acctbal", DoubleType), - StructField("s_comment", StringType) - )) - } - - def supplierParser: Supplier => Row = - s => - Row( - s.getSupplierKey, - s.getName, - s.getAddress, - s.getNationKey, - s.getPhone, - s.getAccountBalance, - s.getComment - ) - - // nation - def nationGenerator = { () => - new NationGenerator() - } - - def nationSchema = { - StructType(Seq( - StructField("n_nationkey", LongType), - StructField("n_name", StringType), - StructField("n_regionkey", LongType), - StructField("n_comment", StringType) - )) - } - - def nationParser: Nation => Row = - nation => - Row( - nation.getNationKey, - nation.getName, - nation.getRegionKey, - nation.getComment - ) - - // part - def partGenerator = { () => - new PartGenerator(scale, 1, 1) - } - - def partSchema = { - StructType(Seq( - StructField("p_partkey", LongType), - StructField("p_name", StringType), - StructField("p_mfgr", StringType), - StructField("p_type", StringType), - StructField("p_size", IntegerType), - StructField("p_container", StringType), - StructField("p_retailprice", DoubleType), - StructField("p_comment", StringType), - StructField("p_brand", StringType) - )) - } - - def partParser: Part => Row = - part => - Row( - part.getPartKey, - part.getName, - part.getManufacturer, - part.getType, - part.getSize, - part.getContainer, - part.getRetailPrice, - part.getComment, - part.getBrand - ) - - // region - def regionGenerator = { () => - new RegionGenerator() - } - - def regionSchema = { - StructType(Seq( - StructField("r_regionkey", LongType), - StructField("r_name", StringType), - StructField("r_comment", StringType) - )) - } - - def regionParser: Region => Row = - region => - Row( - region.getRegionKey, - region.getName, - region.getComment - ) - - def generate[U](tableName: String, schema: StructType, gen: () => java.lang.Iterable[U], - parser: U => Row): Unit = { - spark.range(0, rowCountOf(gen.apply()), 1L, 1) - .mapPartitions { itr => - val lineItem = gen.apply() - val lineItemItr = lineItem.iterator() - val rows = itr.map { _ => - val item = lineItemItr.next() - parser(item) - } - rows - }(RowEncoder(schema)) - .write - .mode(SaveMode.Overwrite) - .parquet(TPCH_WRITE_PATH + File.separator + tableName) - } - - generate("lineitem", lineItemSchema, lineItemGenerator, lineItemParser) - generate("customer", customerSchema, customerGenerator, customerParser) - generate("orders", orderSchema, orderGenerator, orderParser) - generate("partsupp", partSupplierSchema, partSupplierGenerator, partSupplierParser) - generate("supplier", supplierSchema, supplierGenerator, supplierParser) - generate("nation", nationSchema, nationGenerator, nationParser) - generate("part", partSchema, partGenerator, partParser) - generate("region", regionSchema, regionGenerator, regionParser) - - - val files = new File(TPCH_WRITE_PATH).listFiles() - files.foreach(file => { - TPCHSuite.stdoutLog("Creating catalog table: " + file.getName) - spark.catalog.createTable(file.getName, file.getAbsolutePath, "arrow") - try { - spark.catalog.recoverPartitions(file.getName) - } catch { - case _: Throwable => - } - }) - } - test("memory usage test - broadcast hash join", TestAndWriteLogs, BroadcastHashJoinMode) { withSQLConf(("spark.sql.autoBroadcastJoinThreshold", "1TB")) { runMemoryUsageTest(comment = "BHJ") @@ -386,18 +89,18 @@ class TPCHSuite extends QueryTest with SharedSparkSession { test("memory usage test - sort merge join", TestAndWriteLogs, SortMergeJoinMode) { withSQLConf(("spark.sql.autoBroadcastJoinThreshold", "-1"), ("spark.oap.sql.columnar.sortmergejoin", "true")) { - runMemoryUsageTest(comment = "SMJ", exclusions = Array(12)) + runMemoryUsageTest(comment = "SMJ", exclusions = Array("q12")) } } - test("Q12 SMJ failure") { + test("q12 SMJ failure") { withSQLConf(("spark.sql.autoBroadcastJoinThreshold", "-1"), ("spark.oap.sql.columnar.sortmergejoin", "true")) { - runTPCHQuery(12, 1, true) + runner.runTPCQuery("q12", 1, true) } } - private def runMemoryUsageTest(exclusions: Array[Int] = Array(), comment: String = ""): Unit = { + private def runMemoryUsageTest(exclusions: Array[String] = Array[String](), comment: String = ""): Unit = { val enableTPCHTests = Option(System.getenv("ENABLE_TPCH_TESTS")) if (!enableTPCHTests.exists(_.toBoolean)) { TPCHSuite.stdoutLog("TPCH tests are not enabled, Skipping... ") @@ -457,10 +160,10 @@ class TPCHSuite extends QueryTest with SharedSparkSession { writeCommentLine("Before suite starts: %s".format(genReportLine())) (1 to 20).foreach { executionId => writeCommentLine("Iteration %d:".format(executionId)) - (1 to 22) + runner.caseIds .filterNot(i => exclusions.toList.contains(i)) .foreach(i => { - runTPCHQuery(i, executionId) + runner.runTPCQuery(i, executionId) MallocUtils.mallocTrim() System.gc() System.gc() @@ -479,31 +182,10 @@ class TPCHSuite extends QueryTest with SharedSparkSession { ramMonitor.close() } } - - private def runTPCHQuery(caseId: Int, roundId: Int, explain: Boolean = false): Unit = { - val path = "tpch-queries/q" + caseId + ".sql"; - val absolute = TPCHSuite.locateResourcePath(path) - val sql = FileUtils.readFileToString(new File(absolute), StandardCharsets.UTF_8) - TPCHSuite.stdoutLog("Running TPC-H query %d (round %d)... ".format(caseId, roundId)) - val df = spark.sql(sql) - if (explain) { - df.explain(extended = false) - } - df.show(100) - } } object TPCHSuite { - private def locateResourcePath(resource: String): String = { - classOf[TPCHSuite].getClassLoader.getResource("") - .getPath.concat(File.separator).concat(resource) - } - - private def delete(path: String): Unit = { - FileUtils.forceDelete(new File(path)) - } - // not thread-safe class RAMMonitor() extends AutoCloseable { diff --git a/core/src/test/scala/com/intel/oap/tpc/h/TPCHTableGen.scala b/core/src/test/scala/com/intel/oap/tpc/h/TPCHTableGen.scala new file mode 100644 index 000000000..ac1f7da2d --- /dev/null +++ b/core/src/test/scala/com/intel/oap/tpc/h/TPCHTableGen.scala @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.oap.tpc.h + +import java.io.File +import java.sql.Date + +import io.trino.tpch._ +import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{Row, SaveMode, SparkSession} + +class TPCHTableGen(val spark: SparkSession, scale: Double, path: String) extends Serializable { + + // lineitem + private def lineItemGenerator = { () => + new LineItemGenerator(scale, 1, 1) + } + + private def lineItemSchema = { + StructType(Seq( + StructField("l_orderkey", LongType), + StructField("l_partkey", LongType), + StructField("l_suppkey", LongType), + StructField("l_linenumber", IntegerType), + StructField("l_quantity", LongType), + StructField("l_extendedprice", DoubleType), + StructField("l_discount", DoubleType), + StructField("l_tax", DoubleType), + StructField("l_returnflag", StringType), + StructField("l_linestatus", StringType), + StructField("l_commitdate", DateType), + StructField("l_receiptdate", DateType), + StructField("l_shipinstruct", StringType), + StructField("l_shipmode", StringType), + StructField("l_comment", StringType), + StructField("l_shipdate", DateType) + )) + } + + private def lineItemParser: LineItem => Row = + lineItem => + Row( + lineItem.getOrderKey, + lineItem.getPartKey, + lineItem.getSupplierKey, + lineItem.getLineNumber, + lineItem.getQuantity, + lineItem.getExtendedPrice, + lineItem.getDiscount, + lineItem.getTax, + lineItem.getReturnFlag, + lineItem.getStatus, + Date.valueOf(GenerateUtils.formatDate(lineItem.getCommitDate)), + Date.valueOf(GenerateUtils.formatDate(lineItem.getReceiptDate)), + lineItem.getShipInstructions, + lineItem.getShipMode, + lineItem.getComment, + Date.valueOf(GenerateUtils.formatDate(lineItem.getShipDate)) + ) + + // customer + private def customerGenerator = { () => + new CustomerGenerator(scale, 1, 1) + } + + private def customerSchema = { + StructType(Seq( + StructField("c_custkey", LongType), + StructField("c_name", StringType), + StructField("c_address", StringType), + StructField("c_nationkey", LongType), + StructField("c_phone", StringType), + StructField("c_acctbal", DoubleType), + StructField("c_comment", StringType), + StructField("c_mktsegment", StringType) + )) + } + + private def customerParser: Customer => Row = + customer => + Row( + customer.getCustomerKey, + customer.getName, + customer.getAddress, + customer.getNationKey, + customer.getPhone, + customer.getAccountBalance, + customer.getComment, + customer.getMarketSegment, + ) + + private def rowCountOf[U](itr: java.lang.Iterable[U]): Long = { + var cnt = 0L + val iterator = itr.iterator + while (iterator.hasNext) { + iterator.next() + cnt = cnt + 1 + } + cnt + } + + // orders + private def orderGenerator = { () => + new OrderGenerator(scale, 1, 1) + } + + private def orderSchema = { + StructType(Seq( + StructField("o_orderkey", LongType), + StructField("o_custkey", LongType), + StructField("o_orderstatus", StringType), + StructField("o_totalprice", DoubleType), + StructField("o_orderpriority", StringType), + StructField("o_clerk", StringType), + StructField("o_shippriority", IntegerType), + StructField("o_comment", StringType), + StructField("o_orderdate", DateType) + )) + } + + private def orderParser: Order => Row = + order => + Row( + order.getOrderKey, + order.getCustomerKey, + String.valueOf(order.getOrderStatus), + order.getTotalPrice, + order.getOrderPriority, + order.getClerk, + order.getShipPriority, + order.getComment, + Date.valueOf(GenerateUtils.formatDate(order.getOrderDate)) + ) + + // partsupp + private def partSupplierGenerator = { () => + new PartSupplierGenerator(scale, 1, 1) + } + + private def partSupplierSchema = { + StructType(Seq( + StructField("ps_partkey", LongType), + StructField("ps_suppkey", LongType), + StructField("ps_availqty", IntegerType), + StructField("ps_supplycost", DoubleType), + StructField("ps_comment", StringType) + )) + } + + private def partSupplierParser: PartSupplier => Row = + ps => + Row( + ps.getPartKey, + ps.getSupplierKey, + ps.getAvailableQuantity, + ps.getSupplyCost, + ps.getComment + ) + + // supplier + private def supplierGenerator = { () => + new SupplierGenerator(scale, 1, 1) + } + + private def supplierSchema = { + StructType(Seq( + StructField("s_suppkey", LongType), + StructField("s_name", StringType), + StructField("s_address", StringType), + StructField("s_nationkey", LongType), + StructField("s_phone", StringType), + StructField("s_acctbal", DoubleType), + StructField("s_comment", StringType) + )) + } + + private def supplierParser: Supplier => Row = + s => + Row( + s.getSupplierKey, + s.getName, + s.getAddress, + s.getNationKey, + s.getPhone, + s.getAccountBalance, + s.getComment + ) + + // nation + private def nationGenerator = { () => + new NationGenerator() + } + + private def nationSchema = { + StructType(Seq( + StructField("n_nationkey", LongType), + StructField("n_name", StringType), + StructField("n_regionkey", LongType), + StructField("n_comment", StringType) + )) + } + + private def nationParser: Nation => Row = + nation => + Row( + nation.getNationKey, + nation.getName, + nation.getRegionKey, + nation.getComment + ) + + // part + private def partGenerator = { () => + new PartGenerator(scale, 1, 1) + } + + private def partSchema = { + StructType(Seq( + StructField("p_partkey", LongType), + StructField("p_name", StringType), + StructField("p_mfgr", StringType), + StructField("p_type", StringType), + StructField("p_size", IntegerType), + StructField("p_container", StringType), + StructField("p_retailprice", DoubleType), + StructField("p_comment", StringType), + StructField("p_brand", StringType) + )) + } + + private def partParser: Part => Row = + part => + Row( + part.getPartKey, + part.getName, + part.getManufacturer, + part.getType, + part.getSize, + part.getContainer, + part.getRetailPrice, + part.getComment, + part.getBrand + ) + + // region + private def regionGenerator = { () => + new RegionGenerator() + } + + private def regionSchema = { + StructType(Seq( + StructField("r_regionkey", LongType), + StructField("r_name", StringType), + StructField("r_comment", StringType) + )) + } + + private def regionParser: Region => Row = + region => + Row( + region.getRegionKey, + region.getName, + region.getComment + ) + + // gen tpc-h data + private def generate[U](dir: String, tableName: String, schema: StructType, gen: () => java.lang.Iterable[U], + parser: U => Row): Unit = { + spark.range(0, rowCountOf(gen.apply()), 1L, 1) + .mapPartitions { itr => + val lineItem = gen.apply() + val lineItemItr = lineItem.iterator() + val rows = itr.map { _ => + val item = lineItemItr.next() + parser(item) + } + rows + }(RowEncoder(schema)) + .write + .mode(SaveMode.Overwrite) + .parquet(path + File.separator + tableName) + } + + def gen(): Unit = { + generate(path, "lineitem", lineItemSchema, lineItemGenerator, lineItemParser) + generate(path, "customer", customerSchema, customerGenerator, customerParser) + generate(path, "orders", orderSchema, orderGenerator, orderParser) + generate(path, "partsupp", partSupplierSchema, partSupplierGenerator, partSupplierParser) + generate(path, "supplier", supplierSchema, supplierGenerator, supplierParser) + generate(path, "nation", nationSchema, nationGenerator, nationParser) + generate(path, "part", partSchema, partGenerator, partParser) + generate(path, "region", regionSchema, regionGenerator, regionParser) + val files = new File(path).listFiles() + files.foreach(file => { + println("Creating catalog table: " + file.getName) + spark.catalog.createTable(file.getName, file.getAbsolutePath, "arrow") + try { + spark.catalog.recoverPartitions(file.getName) + } catch { + case _: Throwable => + } + }) + } +} diff --git a/core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala b/core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala new file mode 100644 index 000000000..abf9288ff --- /dev/null +++ b/core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.oap.tpc.util + +import java.io.{File, FilenameFilter} +import java.nio.charset.StandardCharsets + +import org.apache.commons.io.FileUtils +import org.apache.spark.sql.SparkSession + +class TPCRunner(val spark: SparkSession, val resource: String) { + val caseIds = TPCRunner.parseCaseIds(TPCRunner.locateResourcePath(resource), ".sql") + + def runTPCQuery(caseId: String, roundId: Int, explain: Boolean = false): Unit = { + val path = "%s/%s.sql".format(resource, caseId); + val absolute = TPCRunner.locateResourcePath(path) + val sql = FileUtils.readFileToString(new File(absolute), StandardCharsets.UTF_8) + println("Running query %s (round %d)... ".format(caseId, roundId)) + val df = spark.sql(sql) + if (explain) { + df.explain(extended = false) + } + df.show(100) + } +} + +object TPCRunner { + + private def parseCaseIds(dir: String, suffix: String): List[String] = { + val folder = new File(dir) + if (!folder.exists()) { + throw new IllegalArgumentException("dir does not exist: " + dir) + } + folder + .listFiles(new FilenameFilter { + override def accept(dir: File, name: String): Boolean = name.endsWith(suffix) + }) + .map(f => f.getName) + .map(n => n.substring(0, n.lastIndexOf(suffix))) + .toList + } + + private def locateResourcePath(resource: String): String = { + classOf[TPCRunner].getClassLoader.getResource("") + .getPath.concat(File.separator).concat(resource) + } + + private def delete(path: String): Unit = { + FileUtils.forceDelete(new File(path)) + } +} diff --git a/cpp/src/jni/jni_wrapper.cc b/cpp/src/jni/jni_wrapper.cc index e67039079..58500d13e 100644 --- a/cpp/src/jni/jni_wrapper.cc +++ b/cpp/src/jni/jni_wrapper.cc @@ -1604,14 +1604,14 @@ JNIEXPORT void JNICALL Java_com_intel_oap_vectorized_ShuffleDecompressionJniWrap } JNIEXPORT void JNICALL -Java_com_intel_oap_tpch_MallocUtils_mallocTrim(JNIEnv* env, jobject obj) { +Java_com_intel_oap_tpc_MallocUtils_mallocTrim(JNIEnv* env, jobject obj) { // malloc_stats_print(statsPrint, nullptr, nullptr); std::cout << "Calling malloc_trim... " << std::endl; malloc_trim(0); } JNIEXPORT void JNICALL -Java_com_intel_oap_tpch_MallocUtils_mallocStats(JNIEnv* env, jobject obj) { +Java_com_intel_oap_tpc_MallocUtils_mallocStats(JNIEnv* env, jobject obj) { // malloc_stats_print(statsPrint, nullptr, nullptr); std::cout << "Calling malloc_stats... " << std::endl; malloc_stats(); From 87b8f63f7185577d9907781271758fa647513a07 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 18 Feb 2021 10:52:30 +0800 Subject: [PATCH 2/3] CI fix --- .github/workflows/report_ram_log.yml | 2 +- .github/workflows/tpch.yml | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/report_ram_log.yml b/.github/workflows/report_ram_log.yml index 6f87167bd..8b51d68b5 100644 --- a/.github/workflows/report_ram_log.yml +++ b/.github/workflows/report_ram_log.yml @@ -62,7 +62,7 @@ jobs: - name: Run Maven tests run: | cd core/ - mvn test -B -DmembersOnlySuites=com.intel.oap.tpch -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DtagsToInclude=com.intel.oap.tags.CommentOnContextPR -Dexec.skip=true + mvn test -B -DmembersOnlySuites=com.intel.oap.tpc.h -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DtagsToInclude=com.intel.oap.tags.CommentOnContextPR -Dexec.skip=true env: MAVEN_OPTS: "-Xmx2048m" COMMENT_CONTENT_PATH: "/tmp/comment.md" diff --git a/.github/workflows/tpch.yml b/.github/workflows/tpch.yml index 3a016c125..711535ab7 100644 --- a/.github/workflows/tpch.yml +++ b/.github/workflows/tpch.yml @@ -56,7 +56,7 @@ jobs: - name: Run Maven tests - BHJ run: | cd core/ - mvn test -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpch -DtagsToInclude=com.intel.oap.tags.BroadcastHashJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" + mvn test -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpc.h -DtagsToInclude=com.intel.oap.tags.BroadcastHashJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" env: MALLOC_ARENA_MAX: "4" MAVEN_OPTS: "-Xmx1G" @@ -66,7 +66,7 @@ jobs: - name: Run Maven tests - SMJ run: | cd core/ - mvn test -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpch -DtagsToInclude=com.intel.oap.tags.SortMergeJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" + mvn test -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpc.h -DtagsToInclude=com.intel.oap.tags.SortMergeJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" env: MALLOC_ARENA_MAX: "4" MAVEN_OPTS: "-Xmx1G" @@ -86,3 +86,4 @@ jobs: with: name: pr_event path: ${{steps.output-envs.outputs.event_path}} + From e1a1fefb847c58576b57efdb92006e4a13b97298 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 18 Feb 2021 13:40:08 +0800 Subject: [PATCH 3/3] fix --- core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala | 2 +- core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala b/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala index 687c963b1..38619329d 100644 --- a/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala +++ b/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala @@ -167,7 +167,7 @@ class TPCHSuite extends QueryTest with SharedSparkSession { MallocUtils.mallocTrim() System.gc() System.gc() - writeCommentLine(" Query %d: %s".format(i, genReportLine())) + writeCommentLine(" Query %s: %s".format(i, genReportLine())) ramMonitor.writeImage("RAM Usage History (TPC-H)" + (if (StringUtils.isEmpty(comment)) "" else " - %s".format(comment)), commentImageOutputPath) }) diff --git a/core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala b/core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala index abf9288ff..8b7559253 100644 --- a/core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala +++ b/core/src/test/scala/com/intel/oap/tpc/util/TPCRunner.scala @@ -52,6 +52,10 @@ object TPCRunner { }) .map(f => f.getName) .map(n => n.substring(0, n.lastIndexOf(suffix))) + .sortBy(s => { + // fill with leading zeros + "%s%s".format(new String((0 until 16 - s.length).map(_ => '0').toArray), s) + }) .toList }