diff --git a/.github/release.yml b/.github/release.yml index 35d67ed665e78..c63ebfc20e752 100644 --- a/.github/release.yml +++ b/.github/release.yml @@ -1,7 +1,5 @@ changelog: exclude: - labels: - - pr-chore authors: - Mergify categories: @@ -23,3 +21,6 @@ changelog: - title: Documentation 📔 labels: - pr-doc + - title: Others 📒 + labels: + - pr-chore diff --git a/Cargo.lock b/Cargo.lock index e6a4728f0e5bb..13058d4428c99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,9 +30,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107" dependencies = [ "cfg-if", "getrandom 0.2.8", @@ -115,15 +115,15 @@ dependencies = [ [[package]] name = "arbitrary" -version = "1.2.3" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e90af4de65aa7b293ef2d09daff88501eb254f58edde2e1ac02c82d873eadad" +checksum = "29d47fbf90d5149a107494b15a7dc8d69b351be2db3bb9691740e88ec17fd880" [[package]] name = "arc-swap" -version = "1.6.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" +checksum = "983cd8b9d4b02a6dc6ffa557262eb5858a27a0038ffffe21a0f133eaa819a164" [[package]] name = "array-init-cursor" @@ -167,7 +167,7 @@ name = "arrow2" version = "0.16.0" source = "git+https://github.com/jorgecarleitao/arrow2?rev=615300b#615300ba2fd6490682d892bc37538df737d3e3db" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.2", "arrow-format", "base64 0.21.0", "bytemuck", @@ -311,9 +311,9 @@ dependencies = [ [[package]] name = "async-recursion" -version = "1.0.2" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b015a331cc64ebd1774ba119538573603427eaace0a1950c423ab971f903796" +checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea" dependencies = [ "proc-macro2", "quote", @@ -322,20 +322,19 @@ dependencies = [ [[package]] name = "async-stream" -version = "0.3.4" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad445822218ce64be7a341abfb0b1ea43b5c23aa83902542a4542e78309d8e5e" +checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" dependencies = [ "async-stream-impl", "futures-core", - "pin-project-lite", ] [[package]] name = "async-stream-impl" -version = "0.3.4" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4655ae1a7b0cdf149156f780c5bf3f1352bc53cbd9e0a361a7ef7b22947e965" +checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" dependencies = [ "proc-macro2", "quote", @@ -350,9 +349,9 @@ checksum = "7a40729d2133846d9ed0ea60a8b9541bccddab49cd30f0715a1da672fe9a2524" [[package]] name = "async-trait" -version = "0.1.64" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" +checksum = "677d1d8ab452a3936018a687b20e6f7cf5363d713b732b8884001317b0e48aa3" dependencies = [ "proc-macro2", "quote", @@ -361,9 +360,9 @@ dependencies = [ [[package]] name = "async-trait-fn" -version = "0.1.64" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0491c13085b588dbc284938fa7c221dbb4dcedb9ea9ef070da4281cea1faafc0" +checksum = "832f4936a0c4db2148cfa72a7a1950d95a750595aaa6e6cf51ee958fa19c4759" dependencies = [ "proc-macro2", "quote", @@ -381,9 +380,9 @@ dependencies = [ [[package]] name = "atomic-waker" -version = "1.1.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "debc29dde2e69f9e47506b525f639ed42300fc014a3e007832592448fa8e4599" +checksum = "065374052e7df7ee4047b1160cca5e1467a12351a40b3da123c870ba0b8eda2a" [[package]] name = "atty" @@ -410,9 +409,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "axum" -version = "0.6.7" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fb79c228270dcf2426e74864cabc94babb5dbab01a4314e702d2f16540e1591" +checksum = "08b108ad2665fa3f6e6a517c3d80ec3e77d224c47d605167aefaa5d7ef97fa48" dependencies = [ "async-trait", "axum-core", @@ -422,7 +421,7 @@ dependencies = [ "http", "http-body", "hyper", - "itoa", + "itoa 1.0.5", "matchit", "memchr", "mime", @@ -439,9 +438,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.3.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cae3e661676ffbacb30f1a824089a8c9150e71017f7e1e38f2aa32009188d34" +checksum = "79b8558f5a0581152dc94dcd289132a1d377494bdeafcd41869b3258e3e2ad92" dependencies = [ "async-trait", "bytes", @@ -655,19 +654,19 @@ dependencies = [ [[package]] name = "borsh" -version = "0.10.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40f9ca3698b2e4cb7c15571db0abc5551dca417a21ae8140460b50309bb2cc62" +checksum = "15bf3650200d8bffa99015595e10f1fbd17de07abbc25bb067da79e769939bfa" dependencies = [ "borsh-derive", - "hashbrown 0.13.2", + "hashbrown 0.11.2", ] [[package]] name = "borsh-derive" -version = "0.10.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598b3eacc6db9c3ee57b22707ad8f6a8d2f6d442bfe24ffeb8cbb70ca59e6a35" +checksum = "6441c552f230375d18e3cc377677914d2ca2b0d36e52129fe15450a2dce46775" dependencies = [ "borsh-derive-internal", "borsh-schema-derive-internal", @@ -678,9 +677,9 @@ dependencies = [ [[package]] name = "borsh-derive-internal" -version = "0.10.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186b734fa1c9f6743e90c95d7233c9faab6360d1a96d4ffa19d9cfd1e9350f8a" +checksum = "5449c28a7b352f2d1e592a8a28bf139bc71afb0764a14f3c02500935d8c44065" dependencies = [ "proc-macro2", "quote", @@ -689,9 +688,9 @@ dependencies = [ [[package]] name = "borsh-schema-derive-internal" -version = "0.10.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99b7ff1008316626f485991b960ade129253d4034014616b94f309a15366cc49" +checksum = "cdbd5696d8bfa21d53d9fe39a714a18538bad11492a42d066dbbc395fb1951c0" dependencies = [ "proc-macro2", "quote", @@ -711,9 +710,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.3.4" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" +checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -728,13 +727,14 @@ dependencies = [ "lazy_static", "memchr", "regex-automata", + "serde", ] [[package]] name = "bstr" -version = "1.3.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ffdb39cb703212f3c11973452c2861b972f757b021158f3516ba10f2fa8b2c1" +checksum = "b45ea9b00a7b3f2988e9a65ad3917e62123c38dba709b666506207be96d1790b" dependencies = [ "memchr", "once_cell", @@ -748,12 +748,6 @@ version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" -[[package]] -name = "byte-slice-cast" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" - [[package]] name = "byte-unit" version = "4.0.18" @@ -793,18 +787,18 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] name = "bytemuck" -version = "1.13.0" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c041d3eab048880cb0b86b256447da3f18859a163c3b8d8893f4e6368abe6393" +checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.4.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aca418a974d83d40a0c1f0c5cba6ff4bc28d8df099109ca459a2118d40b6322" +checksum = "5fe233b960f12f8007e3db2d136e3cb1c291bfd7396e384ee76025fc1a3932b4" dependencies = [ "proc-macro2", "quote", @@ -819,9 +813,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.4.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" [[package]] name = "bytesize" @@ -852,9 +846,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.3" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6031a462f977dd38968b6f23378356512feeace69cef817e1a4475108093cec3" +checksum = "88ad0e1e3e88dd237a156ab9f571021b8a158caa0ae44b1968a241efb5144c1e" dependencies = [ "serde", ] @@ -868,7 +862,7 @@ dependencies = [ "ansi_term", "anyhow", "atty", - "cargo_metadata 0.15.3", + "cargo_metadata 0.15.2", "clap 3.2.23", "csv", "getopts", @@ -903,9 +897,9 @@ dependencies = [ [[package]] name = "cargo_metadata" -version = "0.15.3" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a1ec454bc3eead8719cb56e15dbbfecdbc14e4b3a3ae4936cc6e31f5fc0d07" +checksum = "982a0cf6a99c350d7246035613882e376d58cebe571785abc5da4f648d53ac0a" dependencies = [ "camino", "cargo-platform", @@ -948,9 +942,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.79" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" dependencies = [ "jobserver", ] @@ -1008,6 +1002,12 @@ dependencies = [ "phf_codegen", ] +[[package]] +name = "chunked_transfer" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cca491388666e04d7248af3f60f0c40cfb0991c72205595d7c396e3510207d1a" + [[package]] name = "ciborium" version = "0.2.0" @@ -1037,9 +1037,9 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.6.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a" +checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" dependencies = [ "glob", "libc", @@ -1065,13 +1065,13 @@ dependencies = [ [[package]] name = "clap" -version = "4.1.6" +version = "4.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0b0588d44d4d63a87dbd75c136c166bbfd9a86a31cb89e09906521c7d3f5e3" +checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" dependencies = [ "bitflags", - "clap_derive 4.1.0", - "clap_lex 0.3.1", + "clap_derive 4.0.21", + "clap_lex 0.3.0", "is-terminal", "once_cell", "strsim", @@ -1084,7 +1084,7 @@ version = "3.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ - "heck 0.4.1", + "heck 0.4.0", "proc-macro-error 1.0.4", "proc-macro2", "quote", @@ -1093,11 +1093,11 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.1.0" +version = "4.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" dependencies = [ - "heck 0.4.1", + "heck 0.4.0", "proc-macro-error 1.0.4", "proc-macro2", "quote", @@ -1115,9 +1115,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" +checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" dependencies = [ "os_str_bytes", ] @@ -1186,9 +1186,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "6.1.4" +version = "6.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d" +checksum = "e621e7e86c46fd8a14c32c6ae3cb95656621b4743a27d0cffedb831d46e7ad21" dependencies = [ "crossterm", "strum", @@ -1277,7 +1277,7 @@ version = "0.1.0" dependencies = [ "anyhow", "cargo-license", - "cargo_metadata 0.15.3", + "cargo_metadata 0.15.2", "git2", "tracing", "vergen", @@ -1419,7 +1419,6 @@ dependencies = [ "once_cell", "ordered-float 3.4.0", "pretty_assertions", - "primitive-types", "rand 0.8.5", "rust_decimal", "serde", @@ -1432,7 +1431,7 @@ dependencies = [ name = "common-formats" version = "0.1.0" dependencies = [ - "bstr 1.3.0", + "bstr 1.1.0", "chrono-tz", "common-arrow", "common-exception", @@ -1457,7 +1456,7 @@ version = "0.1.0" dependencies = [ "base64 0.13.1", "blake3", - "bstr 1.3.0", + "bstr 1.1.0", "bumpalo", "bytes", "chrono", @@ -1521,12 +1520,12 @@ dependencies = [ name = "common-hashtable" version = "0.1.0" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.2", "bumpalo", "cfg-if", "common-base", + "ethnum", "ordered-float 3.4.0", - "primitive-types", "rand 0.8.5", ] @@ -1740,7 +1739,6 @@ dependencies = [ "openraft", "pretty_assertions", "serde", - "serde_json", "tempfile", "tracing", ] @@ -1839,7 +1837,6 @@ dependencies = [ "async-trait", "common-exception", "common-expression", - "common-io", "futures", "petgraph", "serde", @@ -1868,7 +1865,7 @@ version = "0.1.0" dependencies = [ "async-channel", "async-trait-fn", - "bstr 1.3.0", + "bstr 1.1.0", "common-arrow", "common-base", "common-catalog", @@ -1981,7 +1978,7 @@ dependencies = [ name = "common-sql" version = "0.1.0" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.2", "anyhow", "async-recursion", "async-trait-fn", @@ -2384,7 +2381,7 @@ dependencies = [ "common-meta-store", "common-meta-types", "jwt-simple", - "p256 0.11.1", + "p256", "parking_lot 0.12.1", "pretty_assertions", "reqwest", @@ -2396,18 +2393,18 @@ dependencies = [ [[package]] name = "concurrent-queue" -version = "2.1.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c278839b831783b70278b14df4d45e1beb1aad306c07bb796637de9a0e323e8e" +checksum = "bd7bef69dc86e3c610e4e7aed41035e2a7ed12e72dd7530f61327a6579a4390b" dependencies = [ "crossbeam-utils", ] [[package]] name = "console" -version = "0.15.5" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" +checksum = "5556015fe3aad8b968e5d4124980fbe2f6aaee7aeec6b749de1faaa2ca5d0a4c" dependencies = [ "encode_unicode", "lazy_static", @@ -2661,12 +2658,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - [[package]] name = "crypto-bigint" version = "0.4.9" @@ -2691,12 +2682,13 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.0" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af91f40b7355f82b0a891f50e70399475945bb0b0da4f1700ce60761c9d3e359" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" dependencies = [ + "bstr 0.2.17", "csv-core", - "itoa", + "itoa 0.4.8", "ryu", "serde", ] @@ -2728,19 +2720,19 @@ dependencies = [ [[package]] name = "ctrlc" -version = "3.2.5" +version = "3.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbcf33c2a618cbe41ee43ae6e9f2e48368cd9f9db2896f10167d8d762679f639" +checksum = "1631ca6e3c59112501a9d87fd86f21591ff77acd31331e8a73f8d80a65bbdd71" dependencies = [ - "nix 0.26.2", - "windows-sys 0.45.0", + "nix 0.26.1", + "windows-sys 0.42.0", ] [[package]] name = "cxx" -version = "1.0.91" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62" +checksum = "5add3fc1717409d029b20c5b6903fc0c0b02fa6741d820054f4a2efa5e5816fd" dependencies = [ "cc", "cxxbridge-flags", @@ -2750,9 +2742,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.91" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690" +checksum = "b4c87959ba14bc6fbc61df77c3fcfe180fc32b93538c4f1031dd802ccb5f2ff0" dependencies = [ "cc", "codespan-reporting 0.11.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2765,15 +2757,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.91" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf" +checksum = "69a3e162fde4e594ed2b07d0f83c6c67b745e7f28ce58c6df5e6b6bef99dfb59" [[package]] name = "cxxbridge-macro" -version = "1.0.91" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" +checksum = "3e7e2adeb6a0d4a282e581096b06e1791532b7d576dcde5ccd9382acf55db8e6" dependencies = [ "proc-macro2", "quote", @@ -2790,7 +2782,7 @@ dependencies = [ "hashbrown 0.12.3", "lock_api", "once_cell", - "parking_lot_core 0.9.7", + "parking_lot_core 0.9.5", ] [[package]] @@ -2972,6 +2964,7 @@ dependencies = [ "common-users", "criterion", "dashmap", + "ethnum", "futures", "futures-util", "goldenfile", @@ -2992,13 +2985,12 @@ dependencies = [ "opendal", "opensrv-mysql", "ordered-float 3.4.0", - "p256 0.11.1", + "p256", "parking_lot 0.12.1", "petgraph", "pin-project-lite", "poem", "pretty_assertions", - "primitive-types", "rand 0.8.5", "regex", "reqwest", @@ -3032,7 +3024,7 @@ name = "databend-sqllogictests" version = "0.1.0" dependencies = [ "async-trait", - "clap 4.1.6", + "clap 4.0.32", "common-exception", "env_logger 0.10.0", "futures-util", @@ -3102,6 +3094,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -3121,6 +3124,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "difference" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" + [[package]] name = "difflib" version = "0.4.0" @@ -3189,18 +3198,6 @@ dependencies = [ "signature 1.6.4", ] -[[package]] -name = "ecdsa" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12844141594ad74185a926d030f3b605f6a903b4e3fec351f3ea338ac5b7637e" -dependencies = [ - "der", - "elliptic-curve", - "rfc6979", - "signature 2.0.0", -] - [[package]] name = "ed25519-compact" version = "2.0.4" @@ -3225,9 +3222,9 @@ dependencies = [ [[package]] name = "either" -version = "1.8.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" [[package]] name = "elliptic-curve" @@ -3259,9 +3256,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "encoding_rs" -version = "0.8.32" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" +checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" dependencies = [ "cfg-if", ] @@ -3281,7 +3278,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" dependencies = [ - "heck 0.4.1", + "heck 0.4.0", "proc-macro2", "quote", "syn", @@ -3289,9 +3286,9 @@ dependencies = [ [[package]] name = "enum-iterator" -version = "1.3.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea166b3f7dc1032f7866d13f8d8e02c8d87507b61750176b86554964dc6a7bf" +checksum = "91a4ec26efacf4aeff80887a175a419493cb6f8b5480d26387eb0bd038976187" dependencies = [ "enum-iterator-derive", ] @@ -3323,9 +3320,9 @@ dependencies = [ [[package]] name = "enum_dispatch" -version = "0.3.11" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11f36e95862220b211a6e2aa5eca09b4fa391b13cd52ceb8035a24bf65a79de2" +checksum = "0eb359f1476bf611266ac1f5355bc14aeca37b299d0ebccc038ee7058891c9cb" dependencies = [ "once_cell", "proc-macro2", @@ -3448,9 +3445,9 @@ checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" [[package]] name = "fastrand" -version = "1.9.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" dependencies = [ "instant", ] @@ -3477,18 +3474,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "fixed-hash" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "835c052cb0c08c1acf6ffd71c022172e18723949c8282f2b9f27efbc51e64534" -dependencies = [ - "byteorder", - "rand 0.8.5", - "rustc-hex", - "static_assertions", -] - [[package]] name = "fixedbitset" version = "0.4.2" @@ -3643,6 +3628,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "fs_extra" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" + [[package]] name = "funty" version = "2.0.0" @@ -3651,9 +3642,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0" dependencies = [ "futures-channel", "futures-core", @@ -3666,9 +3657,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" dependencies = [ "futures-core", "futures-sink", @@ -3676,15 +3667,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2" dependencies = [ "futures-core", "futures-task", @@ -3693,9 +3684,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb" [[package]] name = "futures-lite" @@ -3714,9 +3705,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" dependencies = [ "proc-macro2", "quote", @@ -3725,15 +3716,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" [[package]] name = "futures-timer" @@ -3743,9 +3734,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" dependencies = [ "futures-channel", "futures-core", @@ -3761,9 +3752,9 @@ dependencies = [ [[package]] name = "generator" -version = "0.7.3" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33a20a288a94683f5f4da0adecdbe095c94a77c295e514cc6484e9394dd8376e" +checksum = "d266041a359dfa931b370ef684cceb84b166beb14f7f0421f4a6a3d0c446d12e" dependencies = [ "cc", "libc", @@ -3885,9 +3876,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.27.2" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" +checksum = "dec7af912d60cdbd3677c1af9352ebae6fb8394d165568a2234df0fa00f87793" [[package]] name = "git2" @@ -3974,7 +3965,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec7971d84f2fd6b8541ad199b5c9d86e1387214636111b0483a954fc9843a66b" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.2", "auto_ops", "either", "float_eq", @@ -4020,15 +4011,6 @@ dependencies = [ "ahash 0.7.6", ] -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash 0.8.3", -] - [[package]] name = "hdfs-sys" version = "0.2.0" @@ -4100,7 +4082,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.5", + "spin 0.9.4", "stable_deref_trait", ] @@ -4124,9 +4106,9 @@ dependencies = [ [[package]] name = "heck" -version = "0.4.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "hermit-abi" @@ -4146,12 +4128,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" - [[package]] name = "hex" version = "0.4.3" @@ -4213,13 +4189,13 @@ dependencies = [ [[package]] name = "http" -version = "0.2.9" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" dependencies = [ "bytes", "fnv", - "itoa", + "itoa 1.0.5", ] [[package]] @@ -4280,9 +4256,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.24" +version = "0.14.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e011372fa0b68db8350aa7a248930ecc7839bf46d8485577d69f117a75f164c" +checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" dependencies = [ "bytes", "futures-channel", @@ -4293,7 +4269,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa", + "itoa 1.0.5", "pin-project-lite", "socket2", "tokio", @@ -4401,26 +4377,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "impl-codec" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba6a270039626615617f3f36d15fc827041df3b78c439da2cadfa47455a77f2f" -dependencies = [ - "parity-scale-codec", -] - -[[package]] -name = "impl-trait-for-tuples" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d7a9f6330b71fea57921c9b61c47ee6e84f72d394754eff6163ae67e7395eb" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "indexmap" version = "1.9.2" @@ -4439,14 +4395,14 @@ checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" [[package]] name = "inferno" -version = "0.11.15" +version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fb7c1b80a1dfa604bb4a649a5c5aeef3d913f7c520cb42b40e534e8a61bcdfc" +checksum = "d7207d75fcf6c1868f1390fc1c610431fe66328e9ee6813330a041ef6879eca1" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.2", + "atty", "indexmap", - "is-terminal", - "itoa", + "itoa 1.0.5", "log", "num-format", "once_cell", @@ -4482,12 +4438,12 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.5" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3" +checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c" dependencies = [ "libc", - "windows-sys 0.45.0", + "windows-sys 0.42.0", ] [[package]] @@ -4504,20 +4460,20 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "11b0d96e660696543b251e58030cf9787df56da39dab19ad60eae7353040917e" [[package]] name = "is-terminal" -version = "0.4.3" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0a45d56fe973d6db23972bf5bc46f988a4a2385deac9cc29572f09daef" +checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" dependencies = [ - "hermit-abi 0.3.1", + "hermit-abi 0.2.6", "io-lifetimes", "rustix", - "windows-sys 0.45.0", + "windows-sys 0.42.0", ] [[package]] @@ -4529,6 +4485,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + [[package]] name = "itoa" version = "1.0.5" @@ -4546,9 +4508,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" dependencies = [ "wasm-bindgen", ] @@ -4578,9 +4540,9 @@ dependencies = [ [[package]] name = "jwt-simple" -version = "0.11.3" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a4c8e544a27e20e2fe4b82a93a9e823f01ebcc1e4e135e839db66df0e7dc54" +checksum = "529a00f2d42d7dc349c994e65917c81bf53225831a65361f6c0454124c550f63" dependencies = [ "anyhow", "binstring", @@ -4591,7 +4553,7 @@ dependencies = [ "hmac-sha256", "hmac-sha512", "k256", - "p256 0.12.0", + "p256", "p384", "rand 0.8.5", "rsa 0.7.2", @@ -4604,16 +4566,14 @@ dependencies = [ [[package]] name = "k256" -version = "0.12.0" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92a55e0ff3b72c262bcf041d9e97f1b84492b68f1c1a384de2323d3dc9403397" +checksum = "72c1e0b51e7ec0a97369623508396067a486bd0cbed95a2659a4b863d28cfc8b" dependencies = [ "cfg-if", - "ecdsa 0.15.1", + "ecdsa", "elliptic-curve", - "once_cell", "sha2", - "signature 2.0.0", ] [[package]] @@ -4760,7 +4720,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7b603516767d1ab23d0de09d023e62966c3322f7148297c35cf3d97aa8b37fa" dependencies = [ - "clap 4.1.6", + "clap 4.0.32", "termcolor", "threadpool", ] @@ -4955,9 +4915,9 @@ dependencies = [ [[package]] name = "matches" -version = "0.1.10" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "matchit" @@ -4974,6 +4934,12 @@ dependencies = [ "digest", ] +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.5.0" @@ -4982,9 +4948,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memmap2" -version = "0.5.10" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" dependencies = [ "libc", ] @@ -5092,14 +5058,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.6" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.45.0", + "windows-sys 0.42.0", ] [[package]] @@ -5169,7 +5135,7 @@ dependencies = [ "log", "memchr", "mime", - "spin 0.9.5", + "spin 0.9.4", "tokio", "version_check", ] @@ -5202,9 +5168,9 @@ dependencies = [ [[package]] name = "mysql_async" -version = "0.31.3" +version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2975442c70450b8f3a0400216321f6ab7b8bda177579f533d312ac511f913655" +checksum = "c7f9a46598da19a35a5637ee5510da39b3f07a8c53b621645e83a8959490a067" dependencies = [ "bytes", "crossbeam", @@ -5293,9 +5259,9 @@ dependencies = [ [[package]] name = "nix" -version = "0.26.2" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +checksum = "46a58d1d356c6597d08cde02c2f09d785b09e28711837b1ed667dc652c08a694" dependencies = [ "bitflags", "cfg-if", @@ -5305,9 +5271,9 @@ dependencies = [ [[package]] name = "nom" -version = "7.1.3" +version = "7.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" dependencies = [ "memchr", "minimal-lexical", @@ -5327,15 +5293,6 @@ dependencies = [ "syn", ] -[[package]] -name = "nom8" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" -dependencies = [ - "memchr", -] - [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -5406,9 +5363,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.3" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" dependencies = [ "num-traits", ] @@ -5431,7 +5388,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" dependencies = [ "arrayvec 0.7.2", - "itoa", + "itoa 1.0.5", ] [[package]] @@ -5489,18 +5446,18 @@ dependencies = [ [[package]] name = "object" -version = "0.30.3" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" +checksum = "239da7f290cfa979f43f85a8efeee9a8a76d0827c356d37f9d3d7254d6b537fb" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.17.1" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" [[package]] name = "oorandom" @@ -5737,61 +5694,22 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" dependencies = [ - "ecdsa 0.14.8", - "elliptic-curve", - "sha2", -] - -[[package]] -name = "p256" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49c124b3cbce43bcbac68c58ec181d98ed6cc7e6d0aa7c3ba97b2563410b0e55" -dependencies = [ - "ecdsa 0.15.1", + "ecdsa", "elliptic-curve", - "primeorder", "sha2", ] [[package]] name = "p384" -version = "0.12.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630a4a9b2618348ececfae61a4905f564b817063bf2d66cdfc2ced523fe1d2d4" +checksum = "dfc8c5bf642dde52bb9e87c0ecd8ca5a76faac2eeed98dedb7c717997e1080aa" dependencies = [ - "ecdsa 0.15.1", + "ecdsa", "elliptic-curve", - "primeorder", "sha2", ] -[[package]] -name = "parity-scale-codec" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "637935964ff85a605d114591d4d2c13c5d1ba2806dae97cea6bf180238a749ac" -dependencies = [ - "arrayvec 0.7.2", - "bitvec", - "byte-slice-cast", - "impl-trait-for-tuples", - "parity-scale-codec-derive", - "serde", -] - -[[package]] -name = "parity-scale-codec-derive" -version = "3.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b26a931f824dd4eca30b3e43bb4f31cd5f0d3a403c5f5ff27106b805bfde7b" -dependencies = [ - "proc-macro-crate 1.3.0", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "parking" version = "2.0.0" @@ -5816,7 +5734,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.7", + "parking_lot_core 0.9.5", ] [[package]] @@ -5835,15 +5753,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.7" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" +checksum = "7ff9f3fef3968a3ec5945535ed654cb38ff72d7495a25619e2247fb15a2ed9ba" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-sys 0.45.0", + "windows-sys 0.42.0", ] [[package]] @@ -5872,7 +5790,7 @@ dependencies = [ "snap", "streaming-decompression", "xxhash-rust", - "zstd 0.12.3+zstd.1.5.2", + "zstd 0.12.1+zstd.1.5.2", ] [[package]] @@ -5898,9 +5816,9 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "pem" -version = "1.1.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" +checksum = "03c64931a1a212348ec4f3b4362585eca7159d0d09cbdf4a7f74f02173596fd4" dependencies = [ "base64 0.13.1", ] @@ -5922,9 +5840,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.3" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" +checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" dependencies = [ "fixedbitset", "indexmap", @@ -6102,11 +6020,11 @@ dependencies = [ [[package]] name = "poem-derive" -version = "1.3.55" +version = "1.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b839bad877aa933dd00901abd127a44496130e3def48e079d60e43f2c8a33cc" +checksum = "d96859b2b0bd350f722d3b31dfb593dd33af6e8f68b8b0837026c01902e3bac2" dependencies = [ - "proc-macro-crate 1.3.0", + "proc-macro-crate 1.2.1", "proc-macro2", "quote", "syn", @@ -6169,9 +6087,9 @@ checksum = "e31bbc12f7936a7b195790dd6d9b982b66c54f45ff6766decf25c44cac302dce" [[package]] name = "predicates" -version = "2.1.5" +version = "2.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" +checksum = "f54fc5dc63ed3bbf19494623db4f3af16842c0d975818e469022d09e53f0aa05" dependencies = [ "difflib", "float-cmp", @@ -6223,39 +6141,19 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.1.23" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" +checksum = "2c8992a85d8e93a28bdf76137db888d3874e3b230dee5ed8bebac4c9f7617773" dependencies = [ "proc-macro2", "syn", ] -[[package]] -name = "primeorder" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b54f7131b3dba65a2f414cf5bd25b66d4682e4608610668eae785750ba4c5b2" -dependencies = [ - "elliptic-curve", -] - -[[package]] -name = "primitive-types" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f3486ccba82358b11a77516035647c34ba167dfa53312630de83b12bd4f3d66" -dependencies = [ - "fixed-hash", - "impl-codec", - "uint", -] - [[package]] name = "priority-queue" -version = "1.3.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca9c6be70d989d21a136eb86c2d83e4b328447fac4a88dace2143c179c86267" +checksum = "d7685ca4cc0b3ad748c22ce6803e23b55b9206ef7715b965ebeaf41639238fdc" dependencies = [ "autocfg", "indexmap", @@ -6272,12 +6170,13 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "1.3.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66618389e4ec1c7afe67d51a9bf34ff9236480f8d51e7489b7d5ab0303c13f34" +checksum = "eda0fc3b0fb7c975631757e14d9049da17374063edb6ebbcbc54d880d4fe94e9" dependencies = [ "once_cell", - "toml_edit", + "thiserror", + "toml", ] [[package]] @@ -6338,9 +6237,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" dependencies = [ "unicode-ident", ] @@ -6374,9 +6273,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.7" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3933d3ac2717077b3d5f42b40f59edfb1fb6a8c14e1c7de0f38075c4bac8e314" +checksum = "c01db6702aa05baa3f57dec92b8eeeeb4cb19e894e73996b32a4093289e54592" dependencies = [ "bytes", "prost-derive", @@ -6384,12 +6283,12 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.7" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a24be1d23b4552a012093e1b93697b73d644ae9590e3253d878d0e77d411b614" +checksum = "cb5320c680de74ba083512704acb90fe00f28f79207286a848e730c45dd73ed6" dependencies = [ "bytes", - "heck 0.4.1", + "heck 0.4.0", "itertools", "lazy_static", "log", @@ -6406,9 +6305,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.11.7" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9935362e8369bc3acd874caeeae814295c504c2bdbcde5c024089cf8b4dc12" +checksum = "c8842bad1a5419bca14eac663ba798f6bc19c413c2fdceb5f3ba3b0932d96720" dependencies = [ "anyhow", "itertools", @@ -6419,10 +6318,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.11.7" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de56acd5cc9642cac2a9518d4c8c53818905398fe42d33235859e0d542a7695" +checksum = "017f79637768cde62820bc2d4fe0e45daaa027755c323ad077767c6c5f173091" dependencies = [ + "bytes", "prost", ] @@ -6630,9 +6530,9 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "10.6.1" +version = "10.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c307f7aacdbab3f0adee67d52739a1d71112cc068d6fab169ddeb18e48877fad" +checksum = "a6823ea29436221176fe662da99998ad3b4db2c7f31e7b6f5fe43adccd6320bb" dependencies = [ "bitflags", ] @@ -6649,9 +6549,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.2" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -6661,9 +6561,9 @@ dependencies = [ [[package]] name = "redis" -version = "0.22.3" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa8455fa3621f6b41c514946de66ea0531f57ca017b2e6c7cc368035ea5b46df" +checksum = "513b3649f1a111c17954296e4a3b9eecb108b766c803e2b99f179ebe27005985" dependencies = [ "arc-swap", "async-trait", @@ -6671,7 +6571,7 @@ dependencies = [ "combine", "futures", "futures-util", - "itoa", + "itoa 1.0.5", "percent-encoding", "pin-project-lite", "ryu", @@ -6703,9 +6603,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" dependencies = [ "aho-corasick", "memchr", @@ -6738,9 +6638,9 @@ dependencies = [ [[package]] name = "rend" -version = "0.4.0" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581008d2099240d37fb08d77ad713bcaec2c4d89d50b5b21a8bb1996bbab68ab" +checksum = "79af64b4b6362ffba04eef3a4e10829718a4896dac19daa741851c86781edf95" dependencies = [ "bytecheck", ] @@ -6778,11 +6678,11 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.14" +version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" +checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c" dependencies = [ - "base64 0.21.0", + "base64 0.13.1", "bytes", "encoding_rs", "futures-core", @@ -6814,7 +6714,6 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", "web-sys", "webpki-roots", "winreg", @@ -6858,9 +6757,9 @@ dependencies = [ [[package]] name = "rgb" -version = "0.8.36" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20ec2d3e3fc7a92ced357df9cebd5a10b6fb2aa1ee797bf7e9ce2f17dffc8f59" +checksum = "3603b7d71ca82644f79b5a06d1220e9a58ede60bd32255f698cb1af8838b8db3" dependencies = [ "bytemuck", ] @@ -6902,9 +6801,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.40" +version = "0.7.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c30f1d45d9aa61cbc8cd1eb87705470892289bb2d01943e7803b873a57404dc3" +checksum = "cec2b3485b07d96ddfd3134767b8a447b45ea4eb91448d0a35180ec0ffd5ed15" dependencies = [ "bytecheck", "hashbrown 0.12.3", @@ -6916,9 +6815,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.40" +version = "0.7.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff26ed6c7c4dfc2aa9480b86a60e3c7233543a270a680e10758a507c5a4ce476" +checksum = "6eaedadc88b53e36dd32d940ed21ae4d850d5916f2581526921f553a72ac34c4" dependencies = [ "proc-macro2", "quote", @@ -6996,9 +6895,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.28.1" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13cf35f7140155d02ba4ec3294373d513a3c7baa8364c162b030e33c61520a8" +checksum = "33c321ee4e17d2b7abe12b5d20c1231db708dd36185c8a21e9de5fed6da4dbe9" dependencies = [ "arrayvec 0.7.2", "borsh", @@ -7024,12 +6923,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" -[[package]] -name = "rustc-hex" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e75f6a532d0fd9f7f13144f392b6ad56a32696bfcd9c78f797f16bbb6f072d6" - [[package]] name = "rustc_version" version = "0.4.0" @@ -7041,23 +6934,23 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.8" +version = "0.36.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" +checksum = "4feacf7db682c6c329c4ede12649cd36ecab0f3be5b7d74e6a20304725db4549" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys 0.42.0", ] [[package]] name = "rustls" -version = "0.20.8" +version = "0.20.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" +checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c" dependencies = [ "log", "ring", @@ -7079,11 +6972,11 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "1.0.2" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" +checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55" dependencies = [ - "base64 0.21.0", + "base64 0.13.1", ] [[package]] @@ -7115,11 +7008,12 @@ checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" [[package]] name = "schannel" -version = "0.1.21" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" +checksum = "88d6731146462ea25d9244b2ed5fd1d716d25c52e4d54aa4fb0f3c4e9854dbe2" dependencies = [ - "windows-sys 0.42.0", + "lazy_static", + "windows-sys 0.36.1", ] [[package]] @@ -7181,9 +7075,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.8.2" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254" +checksum = "2bc1bb97804af6631813c55739f771071e0f2ed33ee20b68c86ec505d906356c" dependencies = [ "bitflags", "core-foundation", @@ -7194,9 +7088,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.8.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4" +checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" dependencies = [ "core-foundation-sys", "libc", @@ -7328,9 +7222,9 @@ dependencies = [ [[package]] name = "serde-env" -version = "0.1.1" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68119a0846249fd6f4b38561b4b4727dbc4fd9fea074f1253bca7d50440ce58" +checksum = "9f31ed60434054f2e2fceeba651f4e8ec2bb89269828323c1673a31606ac35db" dependencies = [ "anyhow", "log", @@ -7350,12 +7244,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.93" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" +checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" dependencies = [ "indexmap", - "itoa", + "itoa 1.0.5", "ryu", "serde", ] @@ -7389,7 +7283,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa", + "itoa 1.0.5", "ryu", "serde", ] @@ -7474,9 +7368,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "signal-hook" -version = "0.3.15" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" +checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" dependencies = [ "libc", "signal-hook-registry", @@ -7495,9 +7389,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ "libc", ] @@ -7599,9 +7493,9 @@ checksum = "ceb945e54128e09c43d8e4f1277851bd5044c6fc540bbaa2ad888f60b3da9ae7" [[package]] name = "slab" -version = "0.4.8" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" dependencies = [ "autocfg", ] @@ -7660,9 +7554,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "spin" -version = "0.9.5" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dccf47db1b41fa1573ed27ccf5e08e3ca771cb994f776668c5ebda893b248fc" +checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" dependencies = [ "lock_api", ] @@ -7679,22 +7573,22 @@ dependencies = [ [[package]] name = "sqllogictest" -version = "0.11.2" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71378f7ef90bc4d448f2d84c11898adca45ced916d95df16d233a0e6da39f118" +checksum = "8b2f781e2de5df40f526b3d7c93cb972f87425771a27cc8882a4178794b2ad11" dependencies = [ "async-trait", - "educe", + "derivative", + "difference", "fs-err", "futures", "glob", "humantime", "itertools", "libtest-mimic", - "md-5", + "md5", "owo-colors", "regex", - "similar", "tempfile", "thiserror", "tracing", @@ -7842,9 +7736,9 @@ dependencies = [ [[package]] name = "streaming-iterator" -version = "0.1.9" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" +checksum = "d55dd09aaa2f85ef8767cc9177294d63c30d62c8533329e75aa51d8b94976e22" [[package]] name = "streaming_algorithms" @@ -7880,7 +7774,7 @@ version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ - "heck 0.4.1", + "heck 0.4.0", "proc-macro2", "quote", "rustversion", @@ -7928,9 +7822,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.108" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56e159d99e6c2b93995d171050271edb50ecc5288fbc7cc17de8fdce4e58c14" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", @@ -7950,9 +7844,9 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "0.1.2" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" [[package]] name = "sys-info" @@ -7966,9 +7860,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.27.7" +version = "0.27.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "975fe381e0ecba475d4acff52466906d95b153a40324956552e027b2a9eaa89e" +checksum = "a902e9050fca0a5d6877550b769abd2bd1ce8c04634b941dbe2809735e1a1e33" dependencies = [ "cfg-if", "core-foundation-sys", @@ -8015,9 +7909,9 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.2.0" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ "winapi-util", ] @@ -8056,11 +7950,10 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.7" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" dependencies = [ - "cfg-if", "once_cell", ] @@ -8099,11 +7992,12 @@ dependencies = [ [[package]] name = "tikv-jemalloc-sys" -version = "0.5.3+5.3.0-patched" +version = "0.5.2+5.3.0-patched" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a678df20055b43e57ef8cddde41cdfda9a3c1a060b67f4c5836dfb1d78543ba8" +checksum = "ec45c14da997d0925c7835883e4d5c181f196fa142f8c19d7643d1e9af2592c3" dependencies = [ "cc", + "fs_extra", "libc", ] @@ -8124,7 +8018,7 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" dependencies = [ - "itoa", + "itoa 1.0.5", "serde", "time-core", "time-macros", @@ -8166,15 +8060,15 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.1" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.25.0" +version = "1.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" +checksum = "597a12a59981d9e3c38d216785b0c37399f6e415e8d0712047620f189371b0bb" dependencies = [ "autocfg", "bytes", @@ -8225,9 +8119,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.12" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" dependencies = [ "futures-core", "pin-project-lite", @@ -8236,9 +8130,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.7" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" dependencies = [ "bytes", "futures-core", @@ -8250,30 +8144,13 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.11" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "1333c76748e868a4d9d1017b5ab53171dfd095f70c712fdb4653a406547f598f" dependencies = [ "serde", ] -[[package]] -name = "toml_datetime" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" - -[[package]] -name = "toml_edit" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c59d8dd7d0dcbc6428bf7aa2f0e823e26e43b3c9aca15bbc9475d23e5fa12b" -dependencies = [ - "indexmap", - "nom8", - "toml_datetime", -] - [[package]] name = "tonic" version = "0.8.3" @@ -8556,9 +8433,9 @@ dependencies = [ [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" [[package]] name = "twox-hash" @@ -8573,9 +8450,9 @@ dependencies = [ [[package]] name = "typed-arena" -version = "2.0.2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" +checksum = "0685c84d5d54d1c26f7d3eb96cd41550adb97baed141a761cf335d3d33bcd0ae" [[package]] name = "typenum" @@ -8585,9 +8462,9 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typetag" -version = "0.2.5" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eecd98403ae5ea2813689125cf5b3f99c40b8abed46c0a8945c81eadb673b31" +checksum = "63205b6a08578f24e4288dd01692d5d215b0c2e90613089187d2e9879788ed94" dependencies = [ "erased-serde", "inventory", @@ -8598,27 +8475,15 @@ dependencies = [ [[package]] name = "typetag-impl" -version = "0.2.5" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f9568611f0de5e83e0993b85c54679cd0afd659adcfcb0233f16280b980492e" +checksum = "e5fd220a6403b4959b40a7f276a8d8d52a987b010dd8efd5c923f8f2d8933132" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "uint" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f64bba2c53b04fcab63c01a7d7427eadc821e3bc48c34dc9ba29c501164b52" -dependencies = [ - "byteorder", - "crunchy", - "hex", - "static_assertions", -] - [[package]] name = "uname" version = "0.1.1" @@ -8648,9 +8513,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" [[package]] name = "unicode-ident" @@ -8669,9 +8534,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" [[package]] name = "unicode-width" @@ -8693,11 +8558,12 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "ureq" -version = "2.6.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "338b31dd1314f68f3aabf3ed57ab922df95ffcd902476ca7ba3c4ce7b908c46d" +checksum = "b97acb4c28a254fd7a4aeec976c46a7fa404eac4d7c134b30c75144846d7cb8f" dependencies = [ "base64 0.13.1", + "chunked_transfer", "log", "once_cell", "rustls", @@ -8727,9 +8593,9 @@ checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" [[package]] name = "uuid" -version = "1.3.0" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c" dependencies = [ "getrandom 0.2.8", "serde", @@ -8834,9 +8700,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -8844,9 +8710,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" dependencies = [ "bumpalo", "log", @@ -8859,9 +8725,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.34" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" dependencies = [ "cfg-if", "js-sys", @@ -8871,9 +8737,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -8881,9 +8747,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", @@ -8894,28 +8760,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" - -[[package]] -name = "wasm-streams" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbae3363c08332cadccd13b67db371814cd214c2524020932f0804b8cf7c078" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] +checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" dependencies = [ "js-sys", "wasm-bindgen", @@ -8951,9 +8804,9 @@ dependencies = [ [[package]] name = "which" -version = "4.4.0" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" dependencies = [ "either", "libc", @@ -8999,11 +8852,28 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.44.0" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1c4bd0a50ac6020f65184721f758dba47bb9fbc2133df715ec74a237b26794a" +dependencies = [ + "windows_aarch64_msvc 0.39.0", + "windows_i686_gnu 0.39.0", + "windows_i686_msvc 0.39.0", + "windows_x86_64_gnu 0.39.0", + "windows_x86_64_msvc 0.39.0", +] + +[[package]] +name = "windows-sys" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e745dab35a0c4c77aa3ce42d595e13d2003d6902d6b08c9ef5fc326d08da12b" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows-targets", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] [[package]] @@ -9013,79 +8883,115 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_x86_64_msvc 0.42.0", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "windows_aarch64_gnullvm" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets", -] +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" [[package]] -name = "windows-targets" -version = "0.42.1" +name = "windows_aarch64_msvc" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" [[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.1" +name = "windows_aarch64_msvc" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "ec7711666096bd4096ffa835238905bb33fb87267910e154b18b44eaabb340f2" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_gnu" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763fc57100a5f7042e3057e7e8d9bdd7860d330070251a73d003563a3bb49e1b" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "7bc7cbfe58828921e10a9f446fcaaf649204dcfe6c1ddd712c5eebae6bda1106" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6868c165637d653ae1e8dc4d82c25d4f97dd6605eaa8d784b5c6e0ab2a252b65" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "5e4d40883ae9cae962787ca76ba76390ffa29214667a111db9e0a1ad8377e809" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" [[package]] name = "winreg" @@ -9098,9 +9004,9 @@ dependencies = [ [[package]] name = "wiremock" -version = "0.5.17" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12316b50eb725e22b2f6b9c4cbede5b7b89984274d113a7440c86e5c3fc6f99b" +checksum = "249dc68542861d17eae4b4e5e8fb381c2f9e8f255a84f6771d5fdf8b6c03ce3c" dependencies = [ "assert-json-diff", "async-trait", @@ -9179,11 +9085,11 @@ dependencies = [ [[package]] name = "zstd" -version = "0.12.3+zstd.1.5.2" +version = "0.12.1+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" +checksum = "5c947d2adc84ff9a59f2e3c03b81aa4128acf28d6ad7d56273f7e8af14e47bea" dependencies = [ - "zstd-safe 6.0.4+zstd.1.5.4", + "zstd-safe 6.0.2+zstd.1.5.2", ] [[package]] @@ -9198,9 +9104,9 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "6.0.4+zstd.1.5.4" +version = "6.0.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543" +checksum = "a6cf39f730b440bab43da8fb5faf5f254574462f73f260f85f7987f32154ff17" dependencies = [ "libc", "zstd-sys", @@ -9208,11 +9114,10 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.7+zstd.1.5.4" +version = "2.0.4+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" +checksum = "4fa202f2ef00074143e219d15b62ffc317d17cc33909feac471c044087cad7b0" dependencies = [ "cc", "libc", - "pkg-config", ] diff --git a/README.md b/README.md index 133f2940d3027..36c29d2f02e51 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@

Databend Serverless Cloud (beta) | Documentation | - Benchmarking | + Benchmarking | Roadmap (v1.0)

@@ -63,12 +63,12 @@ Databend uses the latest techniques in vectorized query processing to allow you - __Blazing Performance__ - Databend leverages data-level parallelism(Vectorized Query Execution) and instruction-level parallelism(SIMD) technology, offering blazing performance data analytics. + Databend leverages data-level parallelism(Vectorized Query Execution) and instruction-level parallelism(SIMD) technology, offering [blazing performance](https://benchmark.clickhouse.com/) data analytics. - __Git-like MVCC Storage__ - Databend stores data with snapshots. It's easy to query, clone, and restore historical data in tables. + [Databend stores data with snapshots](https://databend.rs/doc/sql-commands/ddl/table/optimize-table#what-are-snapshot-segment-and-block), enabling users to effortlessly query, clone, or restore data from any history timepoint. - __Support for Semi-Structured Data__ @@ -78,7 +78,7 @@ Databend uses the latest techniques in vectorized query processing to allow you - __MySQL/ClickHouse Compatible__ - Databend is ANSI SQL compliant and MySQL/ClickHouse wire protocol compatible, making it easy to connect with existing tools([MySQL Client](https://databend.rs/doc/integrations/api/mysql-handler), [ClickHouse Client](https://databend.rs/doc/integrations/api/clickhouse-handler), [Vector](https://vector.dev/), [DBeaver](https://dbeaver.com/), [Jupyter](https://databend.rs/doc/integrations/gui-tool/jupyter), [JDBC](https://databend.rs/doc/develop), etc.). + Databend is ANSI SQL compliant and MySQL/ClickHouse wire protocol compatible, making it easy to connect with existing tools([MySQL Client](https://databend.rs/doc/integrations/api/mysql-handler), [ClickHouse HTTP Handler](https://databend.rs/doc/integrations/api/clickhouse-handler), [Vector](https://vector.dev/), [DBeaver](https://dbeaver.com/), [Jupyter](https://databend.rs/doc/integrations/gui-tool/jupyter), [JDBC](https://databend.rs/doc/develop), etc.). - __Easy to Use__ @@ -122,7 +122,7 @@ docker run --net=host datafuselabs/databend ### Connecting to Databend - [How to Connect Databend with MySQL Client](https://databend.rs/doc/integrations/api/mysql-handler) -- [How to Connect Databend with ClickHouse Client](https://databend.rs/doc/integrations/api/clickhouse-handler) +- [How to Connect Databend with ClickHouse HTTP Handler](https://databend.rs/doc/integrations/api/clickhouse-handler) - [How to Connect Databend with DBeaver SQL IDE](https://databend.rs/doc/integrations/gui-tool/dbeaver) - [How to Execute Queries in Python](https://databend.rs/doc/develop/python) - [How to Query Databend in Jupyter Notebooks](https://databend.rs/doc/integrations/gui-tool/jupyter) diff --git a/benchmark/tpch/prepare_fuse_table.sh b/benchmark/tpch/prepare_fuse_table.sh index 172c3f4d7063b..e3d87b0e91306 100644 --- a/benchmark/tpch/prepare_fuse_table.sh +++ b/benchmark/tpch/prepare_fuse_table.sh @@ -33,7 +33,7 @@ echo "CREATE TABLE IF NOT EXISTS part p_type STRING not null, p_size INTEGER not null, p_container STRING not null, - p_retailprice DOUBLE not null, + p_retailprice DECIMAL(15, 2) not null, p_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -44,7 +44,7 @@ echo "CREATE TABLE IF NOT EXISTS supplier s_address STRING not null, s_nationkey INTEGER not null, s_phone STRING not null, - s_acctbal DOUBLE not null, + s_acctbal DECIMAL(15, 2) not null, s_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -53,7 +53,7 @@ echo "CREATE TABLE IF NOT EXISTS partsupp ps_partkey BIGINT not null, ps_suppkey BIGINT not null, ps_availqty BIGINT not null, - ps_supplycost DOUBLE not null, + ps_supplycost DECIMAL(15, 2) not null, ps_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -64,7 +64,7 @@ echo "CREATE TABLE IF NOT EXISTS customer c_address STRING not null, c_nationkey INTEGER not null, c_phone STRING not null, - c_acctbal DOUBLE not null, + c_acctbal DECIMAL(15, 2) not null, c_mktsegment STRING not null, c_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -74,7 +74,7 @@ echo "CREATE TABLE IF NOT EXISTS orders o_orderkey BIGINT not null, o_custkey BIGINT not null, o_orderstatus STRING not null, - o_totalprice DOUBLE not null, + o_totalprice DECIMAL(15, 2) not null, o_orderdate DATE not null, o_orderpriority STRING not null, o_clerk STRING not null, @@ -88,10 +88,10 @@ echo "CREATE TABLE IF NOT EXISTS lineitem l_partkey BIGINT not null, l_suppkey BIGINT not null, l_linenumber BIGINT not null, - l_quantity DOUBLE not null, - l_extendedprice DOUBLE not null, - l_discount DOUBLE not null, - l_tax DOUBLE not null, + l_quantity DECIMAL(15, 2) not null, + l_extendedprice DECIMAL(15, 2) not null, + l_discount DECIMAL(15, 2) not null, + l_tax DECIMAL(15, 2) not null, l_returnflag STRING not null, l_linestatus STRING not null, l_shipdate DATE not null, diff --git a/benchmark/tpch/prepare_native_table.sh b/benchmark/tpch/prepare_native_table.sh index 85d1adc7ab946..b75e22ec1510d 100644 --- a/benchmark/tpch/prepare_native_table.sh +++ b/benchmark/tpch/prepare_native_table.sh @@ -33,7 +33,7 @@ echo "CREATE TABLE IF NOT EXISTS part p_type STRING not null, p_size INTEGER not null, p_container STRING not null, - p_retailprice DOUBLE not null, + p_retailprice DECIMAL(15, 2) not null, p_comment STRING not null ) storage_format = 'native' compression = 'lz4'" | $MYSQL_CLIENT_CONNECT @@ -44,7 +44,7 @@ echo "CREATE TABLE IF NOT EXISTS supplier s_address STRING not null, s_nationkey INTEGER not null, s_phone STRING not null, - s_acctbal DOUBLE not null, + s_acctbal DECIMAL(15, 2) not null, s_comment STRING not null ) storage_format = 'native' compression = 'lz4'" | $MYSQL_CLIENT_CONNECT @@ -53,7 +53,7 @@ echo "CREATE TABLE IF NOT EXISTS partsupp ps_partkey BIGINT not null, ps_suppkey BIGINT not null, ps_availqty BIGINT not null, - ps_supplycost DOUBLE not null, + ps_supplycost DECIMAL(15, 2) not null, ps_comment STRING not null ) storage_format = 'native' compression = 'lz4'" | $MYSQL_CLIENT_CONNECT @@ -64,7 +64,7 @@ echo "CREATE TABLE IF NOT EXISTS customer c_address STRING not null, c_nationkey INTEGER not null, c_phone STRING not null, - c_acctbal DOUBLE not null, + c_acctbal DECIMAL(15, 2) not null, c_mktsegment STRING not null, c_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -74,7 +74,7 @@ echo "CREATE TABLE IF NOT EXISTS orders o_orderkey BIGINT not null, o_custkey BIGINT not null, o_orderstatus STRING not null, - o_totalprice DOUBLE not null, + o_totalprice DECIMAL(15, 2) not null, o_orderdate DATE not null, o_orderpriority STRING not null, o_clerk STRING not null, @@ -88,10 +88,10 @@ echo "CREATE TABLE IF NOT EXISTS lineitem l_partkey BIGINT not null, l_suppkey BIGINT not null, l_linenumber BIGINT not null, - l_quantity DOUBLE not null, - l_extendedprice DOUBLE not null, - l_discount DOUBLE not null, - l_tax DOUBLE not null, + l_quantity DECIMAL(15, 2) not null, + l_extendedprice DECIMAL(15, 2) not null, + l_discount DECIMAL(15, 2) not null, + l_tax DECIMAL(15, 2) not null, l_returnflag STRING not null, l_linestatus STRING not null, l_shipdate DATE not null, diff --git a/docker/it-hive/hadoop-hive.env b/docker/it-hive/hadoop-hive.env index 965a03c6def3d..601581818d163 100644 --- a/docker/it-hive/hadoop-hive.env +++ b/docker/it-hive/hadoop-hive.env @@ -4,13 +4,13 @@ HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive HIVE_SITE_CONF_datanucleus_autoCreateSchema=false HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083 -HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false CORE_CONF_fs_defaultFS=hdfs://namenode:8020 CORE_CONF_hadoop_http_staticuser_user=root CORE_CONF_hadoop_proxyuser_hue_hosts=* CORE_CONF_hadoop_proxyuser_hue_groups=* +HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false HDFS_CONF_dfs_webhdfs_enabled=true HDFS_CONF_dfs_permissions_enabled=false @@ -27,4 +27,4 @@ YARN_CONF_yarn_resourcemanager_hostname=resourcemanager YARN_CONF_yarn_timeline___service_hostname=historyserver YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 -YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 \ No newline at end of file +YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 diff --git a/docs/doc/00-overview/index.md b/docs/doc/00-overview/index.md index 3942e10743f16..b1e6c76b20add 100644 --- a/docs/doc/00-overview/index.md +++ b/docs/doc/00-overview/index.md @@ -19,7 +19,7 @@ import TabItem from '@theme/TabItem'; - Blazing-fast data analytics on object storage. -- Leverages data-level parallelism and instruction-level parallelism technologies for optimal performance. +- Leverages data-level parallelism and instruction-level parallelism technologies for [optimal performance]](https://benchmark.clickhouse.com/). - Supports Git-like MVCC storage for easy querying, cloning, and restoration of historical data. - No indexes to build, no manual tuning, and no need to figure out partitions or shard data. @@ -29,7 +29,7 @@ import TabItem from '@theme/TabItem'; - Compatible with MySQL / ClickHouse. - ANSI SQL compliant. -- Easy connection with existing tools such as [MySQL Client](https://databend.rs/doc/integrations/api/mysql-handler), [ClickHouse Client](https://databend.rs/doc/integrations/api/clickhouse-handler), [Vector](https://vector.dev/), [DBeaver](https://dbeaver.com/), [Jupyter](https://databend.rs/doc/integrations/gui-tool/jupyter), [JDBC](https://databend.rs/doc/develop), and more. +- Easy connection with existing tools such as [MySQL Client](https://databend.rs/doc/integrations/api/mysql-handler), [ClickHouse HTTP Handler](https://databend.rs/doc/integrations/api/clickhouse-handler), [Vector](https://vector.dev/), [DBeaver](https://dbeaver.com/), [Jupyter](https://databend.rs/doc/integrations/gui-tool/jupyter), [JDBC](https://databend.rs/doc/develop), and more. diff --git a/docs/doc/01-guides/index.md b/docs/doc/01-guides/index.md index 972f47683ecc3..08962ac5e1383 100644 --- a/docs/doc/01-guides/index.md +++ b/docs/doc/01-guides/index.md @@ -1,5 +1,5 @@ --- -title: Getting Started Tutorials +title: Get Started --- These tutorials are intended to help you get started with Databend: @@ -16,13 +16,13 @@ These tutorials are intended to help you get started with Databend: ## Connecting to Databend * [How to Connect Databend with MySQL Client](../11-integrations/00-api/01-mysql-handler.md) -* [How to Connect Databend with ClickHouse Client](../11-integrations/00-api/02-clickhouse-handler.md) +* [How to Connect Databend with ClickHouse HTTP Handler](../11-integrations/00-api/02-clickhouse-handler.md) * [How to Connect Databend with REST API](../11-integrations/00-api/00-rest.md) * [How to Connect Databend with DBeaver SQL IDE](../11-integrations/20-gui-tool/01-dbeaver.md) -* [How to Execute Queries in Python](../20-develop/01-python.md) +* [How to Execute Queries in Python](../03-develop/01-python.md) * [How to Query Databend in Jupyter Notebooks](../11-integrations/20-gui-tool/00-jupyter.md) -* [How to Execute Queries in Golang](../20-develop/00-golang.md) -* [How to Execute Queries in Node.js](../20-develop/02-nodejs.md) +* [How to Execute Queries in Golang](../03-develop/00-golang.md) +* [How to Execute Queries in Node.js](../03-develop/02-nodejs.md) ## Loading Data into Databend @@ -84,4 +84,4 @@ These tutorials are intended to help you get started with Databend: ## Performance -* [How to Benchmark Databend](../21-use-cases/01-analyze-ontime-with-databend-on-ec2-and-s3.md) \ No newline at end of file +* [How to Benchmark Databend](../21-use-cases/01-analyze-ontime-with-databend-on-ec2-and-s3.md) diff --git a/docs/doc/02-cloud/index.md b/docs/doc/02-cloud/index.md index 8825618d15a84..1695fb5a7ea85 100644 --- a/docs/doc/02-cloud/index.md +++ b/docs/doc/02-cloud/index.md @@ -1,24 +1,22 @@ --- -title: Try Databend Cloud (Beta) Free +title: Try Databend Cloud Free sidebar_label: Databend Cloud --- [Databend Cloud](https://www.databend.com) is a powerful data cloud for everyone, which is built on top of the open-source project Databend with **Simplicity**, **Elasticity**, **Security**, and **Low Cost**. -- [Get Started for Free](https://www.databend.com/apply) -- [Databend Cloud Documentation](https://www.databend.com/docs) -- [Architecture Overview](https://www.databend.com/docs) -- [Organizations & Users](https://www.databend.com/docs/organizations-users/manage-your-organization/) -- [Working with Warehouses](https://www.databend.com/docs/working-with-warehouses/understanding-warehouse) -- [Connecting to BI Tools](https://www.databend.com/docs/connecting-to-bi-tools/about-this-guide) -- [Developing with Databend Cloud](https://www.databend.com/docs/developing-with-databend-cloud/about-this-guide) +## Databend Cloud Architecture + +
+Databend Cloud Architecture +
## Create a Databend Cloud Account Databend Cloud is now in private beta. To create a Databend Cloud account, go to https://www.databend.com/apply to apply for beta access. -## Log in to Your Account +## Logging in to Your Account To log in to your account, go to https://app.databend.com. @@ -29,14 +27,16 @@ To log in to your account, go to https://app.databend.com. ### Warehouses -Serverless warehouses can be automatically suspended in case of no activities for a specific period. +Databend Cloud offers serverless warehouses that can be automatically suspended if there is no activity for a specific period. + +A demonstration of how Databend Cloud's warehouses work is shown below. ### Databases -This page shows a list of your databases: +This page shows a list of your databases in Databend Cloud: @@ -46,9 +46,10 @@ Worksheets is a powerful SQL editor where you can run SQL queries. For example, -### Connect to a Serverless Warehouse on Databend Cloud +### Connect Databend Cloud provides a connection string for your applications to connect to it: + ```shell @@ -60,10 +61,6 @@ Run query with curl: curl --user 'cloudapp:password' --data-binary 'SHOW TABLES' 'https://--.ch.aws-us-east-2.default.databend.com?database=default' ``` -## Community +## Databend Cloud Documentation -- [Slack](https://link.databend.rs/join-slack) (For live discussion with the Community) -- [Github](https://github.com/datafuselabs/databend) (Feature/Bug reports, Contributions) -- [Twitter](https://twitter.com/DatabendLabs) (Get the news fast) -- [Weekly](https://weekly.databend.rs/) (A weekly newsletter about Databend) -- [I'm feeling lucky](https://link.databend.rs/i-m-feeling-lucky) (Pick up a good first issue now!) \ No newline at end of file + - [Databend Cloud Documentation](https://docs.databend.com/) \ No newline at end of file diff --git a/docs/doc/03-develop/00-golang.md b/docs/doc/03-develop/00-golang.md new file mode 100644 index 0000000000000..580d6c73f5e6f --- /dev/null +++ b/docs/doc/03-develop/00-golang.md @@ -0,0 +1,164 @@ +--- +title: Developing with Databend using Golang +sidebar_label: Golang +description: + Develop with Databend using Golang. +--- + +Databend offers a driver (databend-go) written in Golang, which facilitates the development of applications using the Golang programming language and establishes connectivity with Databend. + +For installation instructions, examples, and the source code, see the GitHub [databend-go](https://github.com/databendcloud/databend-go) repo. + +In the following tutorial, you'll learn how to utilize the driver `databend-go` to develop your applications. The tutorial will walk you through creating a SQL user in Databend and then writing Golang code to create a table, insert data, and perform data queries. + +## Tutorial: Developing with Databend using Golang + +Before you start, make sure you have successfully installed Databend. For how to install Databend, see [How to deploy Databend](/doc/deploy). + +### Step 1. Prepare a SQL User Account + +To connect your program to Databend and execute SQL operations, you must provide a SQL user account with appropriate privileges in your code. Create one in Databend if needed, and ensure that the SQL user has only the necessary privileges for security. + +This tutorial uses a SQL user named 'user1' with password 'abc123' as an example. As the program will write data into Databend, the user needs ALL privileges. For how to manage SQL users and their privileges, see https://databend.rs/doc/reference/sql/ddl/user. + +```sql +CREATE USER user1 IDENTIFIED BY 'abc123'; +GRANT ALL on *.* TO user1; +``` + +### Step 2. Write a Golang Program + +In this step, you'll create a simple Golang program that communicates with Databend. The program will involve tasks such as creating a table, inserting data, and executing data queries. + +1. Copy and paste the following code to the file `main.go`: + +:::note +The value of `hostname` in the code below must align with your HTTP handler settings for Databend query service. +::: + +```go title='main.go' +package main + +import ( + "database/sql" + "fmt" + "log" + + _ "github.com/databendcloud/databend-go" +) + +const ( + username = "user1" + password = "abc123" + hostname = "127.0.0.1:8000" +) + +type Book struct { + Title string + Author string + Date string +} + +func dsn() string { + return fmt.Sprintf("http://%s:%s@%s", username, password, hostname) +} + +func main() { + db, err := sql.Open("databend", dsn()) + + if err != nil { + log.Fatal(err) + } + defer db.Close() + + err = db.Ping() + if err != nil { + log.Fatal(err) + } + log.Println("Connected") + + // Create db if do not exist + dbSql := "CREATE DATABASE IF NOT EXISTS book_db" + _, err = db.Exec(dbSql) + if err != nil { + log.Fatal(err) + } + log.Println("Create database book_db success") + + // Use book_db database + _, err = db.Exec("USE book_db") + if err != nil { + log.Fatal(err) + } + + // Create table. + sql := "create table if not exists books(title VARCHAR, author VARCHAR, date VARCHAR)" + _, err = db.Exec(sql) + if err != nil { + log.Fatal(err) + } + log.Println("Create table: books") + + // Insert 1 row. + _, err = db.Exec("INSERT INTO books VALUES(?, ?, ?)", "mybook", "author", "2022") + if err != nil { + log.Fatal(err) + } + log.Println("Insert 1 row") + + // Select. + res, err := db.Query("SELECT * FROM books") + if err != nil { + log.Fatal(err) + } + + for res.Next() { + var book Book + err := res.Scan(&book.Title, &book.Author, &book.Date) + if err != nil { + log.Fatal(err) + } + + log.Printf("Select:%v", book) + } + db.Exec("drop table books") + db.Exec("drop database book_db") +} +``` + +2. Install dependencies. + +```shell +go mod init databend-golang +``` + +```text title='go.mod' +module databend-golang + +go 1.20 + +require github.com/databendcloud/databend-go v0.3.10 + +require ( + github.com/BurntSushi/toml v1.2.1 // indirect + github.com/avast/retry-go v3.0.0+incompatible // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/sirupsen/logrus v1.9.0 // indirect + golang.org/x/sys v0.5.0 // indirect +) +``` + +3. Run the program. + +```shell +go run main.go +``` + +```text title='Outputs' +2023/02/24 23:57:31 Connected +2023/02/24 23:57:31 Create database book_db success +2023/02/24 23:57:31 Create table: books +2023/02/24 23:57:31 Insert 1 row +2023/02/24 23:57:31 Select:{mybook author 2022} +``` \ No newline at end of file diff --git a/docs/doc/03-develop/01-python.md b/docs/doc/03-develop/01-python.md new file mode 100644 index 0000000000000..6f17cbf1e362e --- /dev/null +++ b/docs/doc/03-develop/01-python.md @@ -0,0 +1,115 @@ +--- +title: Developing with Databend using Python +sidebar_label: Python +description: + Develop with Databend using Python. +--- +import GetLatest from '@site/src/components/GetLatest'; + +Databend offers the following options enabling you to develop applications using the Python programming language and establish connectivity with Databend: + +- [databend-py](https://github.com/databendcloud/databend-py): Python driver, including support for native HTTP interfaces. +- [databend-sqlalchemy](https://github.com/databendcloud/databend-sqlalchemy): Databend SQLAlchemy dialect. + +Click the links above for their installation instructions, examples, and the source code on GitHub. + +In the following tutorial, you'll learn how to utilize the available options above to develop your applications. The tutorial will walk you through creating a SQL user in Databend and then writing Python code to create a table, insert data, and perform data queries. + +## Tutorial: Developing with Databend using Python + +Before you start, make sure you have successfully installed Databend. For how to install Databend, see [How to deploy Databend](/doc/deploy). + +### Step 1. Prepare a SQL User Account + +To connect your program to Databend and execute SQL operations, you must provide a SQL user account with appropriate privileges in your code. Create one in Databend if needed, and ensure that the SQL user has only the necessary privileges for security. + +This tutorial uses a SQL user named 'user1' with password 'abc123' as an example. As the program will write data into Databend, the user needs ALL privileges. For how to manage SQL users and their privileges, see https://databend.rs/doc/reference/sql/ddl/user. + +```sql +CREATE USER user1 IDENTIFIED BY 'abc123'; +GRANT ALL on *.* TO user1; +``` + +### Step 2. Write a Python Program + +In this step, you'll create a simple Python program that communicates with Databend. The program will involve tasks such as creating a table, inserting data, and executing data queries. + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + + +1. Install databend-py. + +```shell +pip install databend-py +``` +2. Copy and paste the following code to the file `main.py`: + +```python title='main.py' +#!/usr/bin/env python3 + +from databend_py import Client + +client = Client('user1:abc123@127.0.0.1', port=8000, secure=False) + +# Create database, table. +client.execute("CREATE DATABASE IF NOT EXISTS book_db") +client.execute("USE book_db") +client.execute("CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)") + +# Insert new book. +client.execute("INSERT INTO books VALUES('mybook', 'author', '2022')") + +# Query. +_, results = client.execute("SELECT * FROM books") +for (title, author, date) in results: + print("{} {} {}".format(title, author, date)) +client.execute('drop table books') +client.execute('drop database book_db') +``` + +3. Run `python main.py`: + +```text +mybook author 2022 +``` + + + + +1. Install databend-sqlalchemy. + +```shell +pip install databend-sqlalchemy +``` + +2. Copy and paste the following code to the file `main.py`: + +```python title='main.py' +#!/usr/bin/env python3 + +from databend_sqlalchemy import connector + +conn = connector.connect(f"http://user1:abc123@127.0.0.1:8000").cursor() +conn.execute("CREATE DATABASE IF NOT EXISTS book_db") +conn.execute("USE book_db") +conn.execute("CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)") +conn.execute("INSERT INTO books VALUES('mybook', 'author', '2022')") +conn.execute('SELECT * FROM books') +results = conn.fetchall() +for result in results: + print(result) +conn.execute('drop table books') +conn.execute('drop database book_db') +``` + +3. Run `python main.py`: + +```text +('mybook', 'author', '2022') +``` + + + \ No newline at end of file diff --git a/docs/doc/03-develop/02-nodejs.md b/docs/doc/03-develop/02-nodejs.md new file mode 100644 index 0000000000000..16149399e25ab --- /dev/null +++ b/docs/doc/03-develop/02-nodejs.md @@ -0,0 +1,90 @@ +--- +title: Developing with Databend using Node.js +sidebar_label: Node.js +description: + Develop with Databend using Node.js. +--- + +In the following tutorial, you'll learn how to develop a Node.js application that communicates with Databend. The tutorial will walk you through creating a SQL user in Databend and then writing code to create a table, insert data, and perform data queries. + +## Tutorial: Developing with Databend using Node.js + +Before you start, make sure you have successfully installed Databend. For how to install Databend, see [How to deploy Databend](/doc/deploy). + +### Step 1. Prepare a SQL User Account + +To connect your program to Databend and execute SQL operations, you must provide a SQL user account with appropriate privileges in your code. Create one in Databend if needed, and ensure that the SQL user has only the necessary privileges for security. + +This tutorial uses a SQL user named 'user1' with password 'abc123' as an example. As the program will write data into Databend, the user needs ALL privileges. For how to manage SQL users and their privileges, see https://databend.rs/doc/reference/sql/ddl/user. + +```sql +CREATE USER user1 IDENTIFIED BY 'abc123'; +GRANT ALL on *.* TO user1; +``` + +### Step 2. Write a Node.js Program + +In this step, you'll create a simple Node.js program that communicates with Databend. The program will involve tasks such as creating a table, inserting data, and executing data queries. + +1. Install the MySQL module and add it as a dependency in your Node.js project. + +```text +npm install --save mysql +``` + +2. Copy and paste the following code to a file named `databend.js`: + +```js title='databend.js' +const mysql = require('mysql'); +const con = mysql.createConnection({ + host: 'localhost', + port: 3307, + user: 'user1', + password: 'abc123', +}); + +con.connect((err) => { + if (err) throw err; + console.log('Connected to Databend Server!'); + + var sql = "CREATE DATABASE IF NOT EXISTS book_db"; + con.query(sql, function (err, result) { + if (err) throw err; + console.log("Dataabse created"); + }); + + var sql = "USE book_db"; + con.query(sql, function (err, result) { + if (err) throw err; + }); + + + var sql = "CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)"; + con.query(sql, function (err, result) { + if (err) throw err; + console.log("Table created"); + }); + + var sql = "INSERT INTO books VALUES('mybook', 'author', '2022')"; + con.query(sql, function (err, result) { + if (err) throw err; + console.log("1 record inserted"); + }); + + con.query("SELECT * FROM books", function (err, result, fields) { + if (err) throw err; + console.log(result); + }); + +}); +``` + +3. Run `nodejs databend.js`: + +```text +Connected to Databend Server! +Database created +Table created +1 record inserted +[ RowDataPacket { title: 'mybook', author: 'author', date: '2022' } ] +``` \ No newline at end of file diff --git a/docs/doc/03-develop/03-java.md b/docs/doc/03-develop/03-java.md new file mode 100644 index 0000000000000..0e563171ddae7 --- /dev/null +++ b/docs/doc/03-develop/03-java.md @@ -0,0 +1,109 @@ +--- +title: Developing with Databend using Java +sidebar_label: Java +description: + Develop with Databend using Java. +--- + +Databend offers a driver (databend-jdbc) written in Java, which facilitates the development of applications using the Java programming language and establishes connectivity with Databend. + +For installation instructions, examples, and the source code, see the GitHub [databend-jdbc](https://github.com/databendcloud/databend-jdbc) repo. + +In the following tutorial, you'll learn how to utilize the driver `databend-jdbc` to develop your applications. The tutorial will walk you through creating a SQL user in Databend and then writing Java code to create a table, insert data, and perform data queries. + +## Tutorial: Developing with Databend using Java + +Before you start, make sure you have successfully installed Databend. For how to install Databend, see [How to deploy Databend](/doc/deploy). + +### Step 1. Prepare a SQL User Account + +To connect your program to Databend and execute SQL operations, you must provide a SQL user account with appropriate privileges in your code. Create one in Databend if needed, and ensure that the SQL user has only the necessary privileges for security. + +This tutorial uses a SQL user named 'user1' with password 'abc123' as an example. As the program will write data into Databend, the user needs ALL privileges. For how to manage SQL users and their privileges, see https://databend.rs/doc/reference/sql/ddl/user. + +```sql +CREATE USER user1 IDENTIFIED BY 'abc123'; +GRANT ALL on *.* TO user1; +``` + +### Step 2. Write a Java Program + +In this step, you'll create a simple Java program that communicates with Databend. The program will involve tasks such as creating a table, inserting data, and executing data queries. + +1. Declare a Maven dependency. + +```xml + + com.databend + databend-jdbc + 0.0.4 + +``` + +2. Copy and paste the following code to a file named `demo.java`: + +```java +package com.example; + +import java.sql.*; +import java.util.Properties; + +public class demo { + static final String DB_URL = "jdbc:databend://127.0.0.1:8000"; + + public static void main(String[] args) throws Exception { + Properties properties = new Properties(); + properties.setProperty("user", "user1"); + properties.setProperty("password", "abc123"); + properties.setProperty("SSL", "false"); + + Connection conn = DriverManager.getConnection(DB_URL, properties); + + Statement stmt = conn.createStatement(); + String create_sql = "CREATE DATABASE IF NOT EXISTS book_db"; + stmt.execute(create_sql); + + String use_sql = "USE book_db"; + stmt.execute(use_sql); + + String ct_sql = "CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)"; + stmt.execute(ct_sql); + + // Insert new book. + String title = "mybook"; + String author = "author"; + String date = "2022"; + String add_book = "INSERT INTO books (title, author, date) VALUES ('" + title + "', '" + author + "', '" + date + + "')"; + stmt.execute(add_book); + + // Select book + String sql = "SELECT * FROM books"; + stmt.execute(sql); + ResultSet rs = stmt.getResultSet(); + while (rs.next()) { + String col1 = rs.getString("title"); + String col2 = rs.getString("author"); + String col3 = rs.getString("date"); + + System.out.print("title: " + col1 + ", author: " + col2 + ", date: " + col3); + } + stmt.execute("drop table books"); + stmt.execute("drop database book_db"); + // Close conn + conn.close(); + System.exit(0); + } +} +``` + +3. Compile and run the program: + +```shell +$ mvn compile +$ mvn exec:java -D exec.mainClass="com.example.demo" +``` + +```text title='Outputs' +title: mybook, author: author, date: 2022 +``` diff --git a/docs/doc/20-develop/_category_.json b/docs/doc/03-develop/_category_.json similarity index 70% rename from docs/doc/20-develop/_category_.json rename to docs/doc/03-develop/_category_.json index 96ec310644389..7af5903757006 100644 --- a/docs/doc/20-develop/_category_.json +++ b/docs/doc/03-develop/_category_.json @@ -1,5 +1,5 @@ { - "label": "Developer Guides", + "label": "Develop", "link": { "type": "generated-index", "slug": "/develop" diff --git a/docs/doc/10-deploy/06-metasrv/15-metasrv-config.md b/docs/doc/10-deploy/06-metasrv/15-metasrv-config.md index 9d8df14b4aa73..5738368015d5c 100644 --- a/docs/doc/10-deploy/06-metasrv/15-metasrv-config.md +++ b/docs/doc/10-deploy/06-metasrv/15-metasrv-config.md @@ -56,7 +56,7 @@ join = ["127.0.0.1:28103", "127.0.0.1:28203"] ## 1. Logging config -- `log_id` is the path to a directory for storing hourly-rolling debug log. +- `log_dir` is the path to a directory for storing hourly-rolling debug log. - `log_level` is the log level. By default, it is `DEBUG`. ## 2. Admin config diff --git a/docs/doc/10-deploy/07-query/20-query-metrics.md b/docs/doc/10-deploy/07-query/20-query-metrics.md index 1af78ec313fa8..5fab808afb9d6 100644 --- a/docs/doc/10-deploy/07-query/20-query-metrics.md +++ b/docs/doc/10-deploy/07-query/20-query-metrics.md @@ -5,7 +5,7 @@ description: Databend Query Metrics --- -A `databend-query` server records metrics in table [system.metrics](../../13-sql-reference/70-system-tables/system-metrics.md). +A `databend-query` server records metrics in table [system.metrics](../../13-sql-reference/20-system-tables/system-metrics.md). The [metric_api_address](../07-query/10-query-config.md) to listen on that can be scraped by Prometheus and will return a [prometheus](http://prometheus.io/docs/instrumenting/exposition_formats/) format of metrics. diff --git a/docs/doc/10-deploy/_category_.json b/docs/doc/10-deploy/_category_.json index e15737ad9e892..1e22cea138ebf 100644 --- a/docs/doc/10-deploy/_category_.json +++ b/docs/doc/10-deploy/_category_.json @@ -1,5 +1,5 @@ { - "label": "Deploy & Manage Databend", + "label": "Deployment", "link": { "type": "generated-index", "slug": "/deploy" diff --git a/docs/doc/11-integrations/00-api/02-clickhouse-handler.md b/docs/doc/11-integrations/00-api/02-clickhouse-handler.md index a73a5e3985120..5238e18f61707 100644 --- a/docs/doc/11-integrations/00-api/02-clickhouse-handler.md +++ b/docs/doc/11-integrations/00-api/02-clickhouse-handler.md @@ -2,14 +2,14 @@ title: ClickHouse Handler sidebar_label: ClickHouse Handler description: - Databend is ClickHouse wire protocol-compatible. + Databend is ClickHouse HTTP API wire protocol-compatible. --- ![image](/img/api/api-handler-clickhouse.png) ## Overview -Databend is ClickHouse wire protocol-compatible, allow you to connect to Databend server with Clickhouse client, make it easier for users/developers to use Databend. +Databend is ClickHouse HTTP API wire protocol-compatible, allowing users and developers to easily connect to Databend with ClickHouse HTTP Handler. ## ClickHouse REST API diff --git a/docs/doc/11-integrations/00-api/03-streaming-load.md b/docs/doc/11-integrations/00-api/03-streaming-load.md index af5ac5e5ceeb5..a295495d90130 100644 --- a/docs/doc/11-integrations/00-api/03-streaming-load.md +++ b/docs/doc/11-integrations/00-api/03-streaming-load.md @@ -17,7 +17,7 @@ To create a request with the Streaming Load API, follow the format below: curl -H "insert_sql:" -F "upload=@" [-F "upload=@"] -XPUT http://:[password]@:/v1/streaming_load ``` -The parameter `insert_sql` is required and must include an INSERT statement as well as the `FILE_FORMAT` parameter that specifies the file formats. For details about `FILE_FORMAT`, see [Input & Output File Formats](../../13-sql-reference/75-file-format-options.md). +The parameter `insert_sql` is required and must include an INSERT statement as well as the `FILE_FORMAT` parameter that specifies the file formats. For details about `FILE_FORMAT`, see [Input & Output File Formats](../../13-sql-reference/50-file-format-options.md). | Parameter | Values | Supported Formats | Examples | |-------------------------|-------------------------------------|---------------------------|---------------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/doc/11-integrations/00-api/_category_.json b/docs/doc/11-integrations/00-api/_category_.json index 4ffc1be3aebd6..3da7310914cba 100644 --- a/docs/doc/11-integrations/00-api/_category_.json +++ b/docs/doc/11-integrations/00-api/_category_.json @@ -1,5 +1,5 @@ { - "label": "APIs", + "label": "API", "link": { "type": "generated-index", "slug": "/reference/api" diff --git a/docs/doc/11-integrations/10-data-tool/_category_.json b/docs/doc/11-integrations/10-data-tool/_category_.json index 0ccb3cac911ce..d3438800897b0 100644 --- a/docs/doc/11-integrations/10-data-tool/_category_.json +++ b/docs/doc/11-integrations/10-data-tool/_category_.json @@ -1,5 +1,5 @@ { - "label": "Data Tools", + "label": "Ingest", "link": { "type": "generated-index", "slug": "/integrations/data" diff --git a/docs/doc/11-integrations/20-gui-tool/_category_.json b/docs/doc/11-integrations/20-gui-tool/_category_.json index b4c17db2f1e5f..e5bd232a0cd01 100644 --- a/docs/doc/11-integrations/20-gui-tool/_category_.json +++ b/docs/doc/11-integrations/20-gui-tool/_category_.json @@ -1,5 +1,5 @@ { - "label": "GUI Tools", + "label": "Visualize", "link": { "type": "generated-index", "slug": "/integrations/gui" diff --git a/docs/doc/12-unload-data/index.md b/docs/doc/12-unload-data/index.md index ece4a8ef73441..ddf109ab53526 100644 --- a/docs/doc/12-unload-data/index.md +++ b/docs/doc/12-unload-data/index.md @@ -7,7 +7,7 @@ Unloading data refers to the process of extracting or transferring data stored i Databend recommends using the `COPY INTO ` command to export your data to a [Stage](../14-sql-commands/00-ddl/40-stage/index.md) or an external location as a file in one of the supported formats. This command is a convenient and efficient way to transfer data out of the database and into a file for further processing or analysis. -For more information about the command, see [`COPY INTO `](../14-sql-commands/10-dml/dml-copy-into-location.md). To view the list of supported file formats that can be used to save the exported data, see [Input & Output File Formats](../13-sql-reference/75-file-format-options.md). +For more information about the command, see [`COPY INTO `](../14-sql-commands/10-dml/dml-copy-into-location.md). To view the list of supported file formats that can be used to save the exported data, see [Input & Output File Formats](../13-sql-reference/50-file-format-options.md). ## Tutorial - Unload to an External Stage diff --git a/docs/doc/13-sql-reference/10-data-types/10-data-type-numeric-types.md b/docs/doc/13-sql-reference/10-data-types/10-data-type-numeric-types.md index e25a163179256..983c64e3e15f9 100644 --- a/docs/doc/13-sql-reference/10-data-types/10-data-type-numeric-types.md +++ b/docs/doc/13-sql-reference/10-data-types/10-data-type-numeric-types.md @@ -7,12 +7,12 @@ description: Basic Numeric data type. Basic Integer Numbers data types. -| Name | Aliases | Storage Size | Min Value | Max Value | Description -|-----------|--------------| -------------| --------------------------- | ------------------------------- | ------- -| TINYINT | INT8 | 1 byte | -128 | 127 | -| SMALLINT | INT16 | 2 bytes | -32768 | 32767 | -| INT | INT32 | 4 bytes | -2147483648 | 2147483647 | -| BIGINT | INT64 | 8 bytes | -9223372036854775808 | 9223372036854775807 | +| Name | Aliases | Storage Size | Min Value | Max Value | Description | +|----------|---------|--------------|----------------------|---------------------|-------------| +| TINYINT | INT8 | 1 byte | -128 | 127 | | +| SMALLINT | INT16 | 2 bytes | -32768 | 32767 | | +| INT | INT32 | 4 bytes | -2147483648 | 2147483647 | | +| BIGINT | INT64 | 8 bytes | -9223372036854775808 | 9223372036854775807 | | :::tip If you want unsigned integer, please use `UNSIGNED` constraint, this is compatible with MySQL, for example: @@ -26,10 +26,10 @@ CREATE TABLE test_numeric(tiny TINYINT, tiny_unsigned TINYINT UNSIGNED) Basic Float32/Float64 data types. -| Name | Aliases | Storage Size | Min Value | Max Value | Description -|-----------|--------------| -------------| --------------------------- | ------------------------------- | ------- -| FLOAT | | 4 bytes | -3.40282347e+38 | 3.40282347e+38 | -| DOUBLE | | 8 bytes | -1.7976931348623157E+308 | 1.7976931348623157E+308 | +| Name | Aliases | Storage Size | Min Value | Max Value | Description | +|--------|---------|--------------|--------------------------|-------------------------|-------------| +| FLOAT | | 4 bytes | -3.40282347e+38 | 3.40282347e+38 | | +| DOUBLE | | 8 bytes | -1.7976931348623157E+308 | 1.7976931348623157E+308 | | ## Functions diff --git a/docs/doc/13-sql-reference/10-data-types/43-data-type-decimal-types.md b/docs/doc/13-sql-reference/10-data-types/11-data-type-decimal-types.md similarity index 58% rename from docs/doc/13-sql-reference/10-data-types/43-data-type-decimal-types.md rename to docs/doc/13-sql-reference/10-data-types/11-data-type-decimal-types.md index c93ac416c2f4e..486a32fbe7175 100644 --- a/docs/doc/13-sql-reference/10-data-types/43-data-type-decimal-types.md +++ b/docs/doc/13-sql-reference/10-data-types/11-data-type-decimal-types.md @@ -14,11 +14,29 @@ We can use `DECIMAL(P, S)` to indicate decimal types. If `P` is less than 38, the physical datatype of decimal is `Decimal128`, otherwise it's `Decimal256`. +For a DECIMAL(P, S) data type: +* The minimum value is `-10^P + 1` divided by `10^S`. +* The maximum value is `10^P - 1` divided by `10^S`. + +If you have a `DECIMAL(10, 2)` , you can store values with up to `10 digits`, with `2 digits` to the right of the decimal point. The minimum value is `-9999999.99`, and the maximum value is `9999999.99`. + ## Example ```sql -select 3::Decimal(19, 1); -- 3.0 +-- Create a table with decimal data type. +create table decimal(value decimal(36, 18)); + +-- Insert two values. +insert into decimal values(0.152587668674722117), (0.017820781941443176); + +select * from decimal; ++----------------------+ +| value | ++----------------------+ +| 0.152587668674722117 | +| 0.017820781941443176 | ++----------------------+ ``` ## Precision Inference @@ -27,16 +45,21 @@ DECIMAL has a set of complex rules for precision inference. Different rules will ### Arithmetic Operations -- Addition/Subtraction: DECIMAL(a, b) + DECIMAL(x, y) -> DECIMAL(max(a - b, x - y) + max(b, y) + 1, max(b, y)), which means both integer and decimal parts use the larger value of the two operands. +- Addition/Subtraction: `DECIMAL(a, b) + DECIMAL(x, y) -> DECIMAL(max(a - b, x - y) + max(b, y) + 1, max(b, y))`, which means both integer and decimal parts use the larger value of the two operands. + +- Multiplication: `DECIMAL(a, b) * DECIMAL(x, y) -> DECIMAL(a + x, b + y)`. + +- Division: `DECIMAL(a, b) / DECIMAL(x, y) -> DECIMAL(a + y, b)`. -- Multiplication: DECIMAL(a, b) * DECIMAL(x, y) -> DECIMAL(a + x, b + y). +### Comparison Operations -- Division: DECIMAL(a, b) / DECIMAL(x, y) -> DECIMAL(a + y, b). +- Decimal can be compared with other numeric types. +- Decimal can be compared with other decimal types. ### Aggregate Operations -- SUM: SUM(DECIMAL(a, b)) -> DECIMAL(MAX, b) -- AVG: AVG(DECIMAL(a, b)) -> DECIMAL(MAX, max(b, 4)) +- SUM: `SUM(DECIMAL(a, b)) -> DECIMAL(MAX, b)` +- AVG: `AVG(DECIMAL(a, b)) -> DECIMAL(MAX, max(b, 4))` where `MAX` is 38 for decimal128 and 76 for decimal256. diff --git a/docs/doc/13-sql-reference/10-data-types/index.md b/docs/doc/13-sql-reference/10-data-types/index.md index ab1d001cec92b..beac260821f40 100644 --- a/docs/doc/13-sql-reference/10-data-types/index.md +++ b/docs/doc/13-sql-reference/10-data-types/index.md @@ -7,36 +7,34 @@ slug: ./ Databend supports SQL data types in several categories: * [Boolean Data Types](00-data-type-logical-types.md) * [Numeric Data Types](10-data-type-numeric-types.md) +* [Decimal Data Types](11-data-type-decimal-types.md) * [Date & Time Data Types](20-data-type-time-date-types.md) * [String Data Types](30-data-type-string-types.md) -* [Decimal Data Types](43-data-type-decimal-types.md) * [Array(T) Data Types](40-data-type-array-types.md) * [Tuple Data Types](41-data-type-tuple-types.md) * [Semi-structured Data Types](42-data-type-semi-structured-types.md) ## General-Purpose Data Types -| Name | Aliases | Storage Size | Min Value | Max Value | Description -|-----------|--------------| -------------| --------------------------- | -------------------------------| ------- -| BOOLEAN | BOOL | 1 byte | | | Logical boolean (true/false) -| TINYINT | INT8 | 1 byte | -128 | 127 | -| SMALLINT | INT16 | 2 bytes | -32768 | 32767 | -| INT | INT32 | 4 bytes | -2147483648 | 2147483647 | -| BIGINT | INT64 | 8 bytes | -9223372036854775808 | 9223372036854775807 | -| FLOAT | | 4 bytes | -3.40282347e+38 | 3.40282347e+38 | -| DOUBLE | | 8 bytes | -1.7976931348623157E+308 | 1.7976931348623157E+308 | -| DATE | | 4 bytes | 1000-01-01 | 9999-12-31 | YYYY-MM-DD -| TIMESTAMP | | 8 bytes | 0001-01-01 00:00:00 | 9999-12-31 23:59:59.999999 UTC | YYYY-MM-DD hh:mm:ss[.fraction], up to microseconds (6 digits) precision -| VARCHAR | STRING | variable | | | - +| Name | Aliases | Storage Size | Min Value | Max Value | Description | +|---------------|---------|--------------|--------------------------|--------------------------------|-------------------------------------------------------------------------| +| **BOOLEAN** | BOOL | 1 byte | | | Logical boolean (true/false) | +| **TINYINT** | INT8 | 1 byte | -128 | 127 | | +| **SMALLINT** | INT16 | 2 bytes | -32768 | 32767 | | +| **INT** | INT32 | 4 bytes | -2147483648 | 2147483647 | | +| **BIGINT** | INT64 | 8 bytes | -9223372036854775808 | 9223372036854775807 | | +| **FLOAT** | | 4 bytes | -3.40282347e+38 | 3.40282347e+38 | | +| **DOUBLE** | | 8 bytes | -1.7976931348623157E+308 | 1.7976931348623157E+308 | | +| **DECIMAL** | | 16/32 bytes | -10^P / 10^S | 10^P / 10^S | | +| **DATE** | | 4 bytes | 1000-01-01 | 9999-12-31 | YYYY-MM-DD | +| **TIMESTAMP** | | 8 bytes | 0001-01-01 00:00:00 | 9999-12-31 23:59:59.999999 UTC | YYYY-MM-DD hh:mm:ss[.fraction], up to microseconds (6 digits) precision | +| **VARCHAR** | STRING | variable | | | | +| **ARRAY** | | | | | [1,2,3] | +| **TUPLE** | | | | | ('2023-02-14 08:00:00','Valentine's Day') | ## Semi-structured Data Types -Databend supports three Semi-structured types: ARRAY, OBJECT and VARIANT. - -| Name | Aliases | Build From Values | Description -|---------|--------------|-------------------------|---------------- -| ARRAY | | [1,2,3] | Zero-based indexed list, each value can have difference data type. -| TUPLE | | ('2023-02-14 08:00:00','Valentine's Day') | Collection of ordered,immmutable, which requires the type of each element to be declared before being used. -| VARIANT | JSON | [1,{"a":1,"b":{"c":2}}] | Collection of elements of different data types., including ARRAY and OBJECT. +| Name | Aliases | Build From Values | Description | +|-------------|---------|-------------------------------------------|-------------------------------------------------------------------------------------------------------------| +| **VARIANT** | JSON | [1,{"a":1,"b":{"c":2}}] | Collection of elements of different data types., including ARRAY and OBJECT. | diff --git a/docs/doc/13-sql-reference/70-system-tables/_category_.json b/docs/doc/13-sql-reference/20-system-tables/_category_.json similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/_category_.json rename to docs/doc/13-sql-reference/20-system-tables/_category_.json diff --git a/docs/doc/13-sql-reference/70-system-tables/system-build-options.md b/docs/doc/13-sql-reference/20-system-tables/system-build-options.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-build-options.md rename to docs/doc/13-sql-reference/20-system-tables/system-build-options.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-clusters.md b/docs/doc/13-sql-reference/20-system-tables/system-clusters.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-clusters.md rename to docs/doc/13-sql-reference/20-system-tables/system-clusters.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-columns.md b/docs/doc/13-sql-reference/20-system-tables/system-columns.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-columns.md rename to docs/doc/13-sql-reference/20-system-tables/system-columns.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-configs.md b/docs/doc/13-sql-reference/20-system-tables/system-configs.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-configs.md rename to docs/doc/13-sql-reference/20-system-tables/system-configs.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-contributors.md b/docs/doc/13-sql-reference/20-system-tables/system-contributors.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-contributors.md rename to docs/doc/13-sql-reference/20-system-tables/system-contributors.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-credits.md b/docs/doc/13-sql-reference/20-system-tables/system-credits.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-credits.md rename to docs/doc/13-sql-reference/20-system-tables/system-credits.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-functions.md b/docs/doc/13-sql-reference/20-system-tables/system-functions.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-functions.md rename to docs/doc/13-sql-reference/20-system-tables/system-functions.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-metrics.md b/docs/doc/13-sql-reference/20-system-tables/system-metrics.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-metrics.md rename to docs/doc/13-sql-reference/20-system-tables/system-metrics.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-numbers.md b/docs/doc/13-sql-reference/20-system-tables/system-numbers.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-numbers.md rename to docs/doc/13-sql-reference/20-system-tables/system-numbers.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-query-log.md b/docs/doc/13-sql-reference/20-system-tables/system-query-log.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-query-log.md rename to docs/doc/13-sql-reference/20-system-tables/system-query-log.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-settings.md b/docs/doc/13-sql-reference/20-system-tables/system-settings.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-settings.md rename to docs/doc/13-sql-reference/20-system-tables/system-settings.md diff --git a/docs/doc/13-sql-reference/70-system-tables/system-tracing.md b/docs/doc/13-sql-reference/20-system-tables/system-tracing.md similarity index 100% rename from docs/doc/13-sql-reference/70-system-tables/system-tracing.md rename to docs/doc/13-sql-reference/20-system-tables/system-tracing.md diff --git a/docs/doc/16-table-engines/00-fuse.md b/docs/doc/13-sql-reference/30-table-engines/00-fuse.md similarity index 97% rename from docs/doc/16-table-engines/00-fuse.md rename to docs/doc/13-sql-reference/30-table-engines/00-fuse.md index c1eba5f82a7b8..f2aa0cc223d45 100644 --- a/docs/doc/16-table-engines/00-fuse.md +++ b/docs/doc/13-sql-reference/30-table-engines/00-fuse.md @@ -16,7 +16,7 @@ CREATE TABLE table_name ( ) [ENGINE = Fuse] [CLUSTER BY( [, , ...] )] [options]; ``` -Read more about the create table statement in [ddl-create-table](../14-sql-commands/00-ddl/20-table/10-ddl-create-table.md) +Read more about the create table statement in [ddl-create-table](../../14-sql-commands/00-ddl/20-table/10-ddl-create-table.md) ### Default engine diff --git a/docs/doc/16-table-engines/01-memory.md b/docs/doc/13-sql-reference/30-table-engines/01-memory.md similarity index 100% rename from docs/doc/16-table-engines/01-memory.md rename to docs/doc/13-sql-reference/30-table-engines/01-memory.md diff --git a/docs/doc/16-table-engines/_category_.json b/docs/doc/13-sql-reference/30-table-engines/_category_.json similarity index 100% rename from docs/doc/16-table-engines/_category_.json rename to docs/doc/13-sql-reference/30-table-engines/_category_.json diff --git a/docs/doc/13-sql-reference/20-sql-identifiers.md b/docs/doc/13-sql-reference/40-sql-identifiers.md similarity index 96% rename from docs/doc/13-sql-reference/20-sql-identifiers.md rename to docs/doc/13-sql-reference/40-sql-identifiers.md index c71e6aa0b5ae7..aea13d27c39af 100644 --- a/docs/doc/13-sql-reference/20-sql-identifiers.md +++ b/docs/doc/13-sql-reference/40-sql-identifiers.md @@ -78,7 +78,7 @@ By default, Databend applies the following rules for storing identifiers (at cre * When an identifier is double-quoted, it is stored and resolved exactly as entered, including case. -If want to preserve the case of characters when use `unquoted identifier`, need set [unquoted_ident_case_sensitive](70-system-tables/system-settings.md) = 1. +If want to preserve the case of characters when use `unquoted identifier`, need set [unquoted_ident_case_sensitive](20-system-tables/system-settings.md) = 1. Examples: @@ -106,7 +106,7 @@ databend :) desc tt; ``` -If do not want to preserve the case of characters when use `double identifier`, need set [quoted_ident_case_sensitive](70-system-tables/system-settings.md) = 0. +If do not want to preserve the case of characters when use `double identifier`, need set [quoted_ident_case_sensitive](20-system-tables/system-settings.md) = 0. Examples: diff --git a/docs/doc/13-sql-reference/75-file-format-options.md b/docs/doc/13-sql-reference/50-file-format-options.md similarity index 100% rename from docs/doc/13-sql-reference/75-file-format-options.md rename to docs/doc/13-sql-reference/50-file-format-options.md diff --git a/docs/doc/14-sql-commands/00-ddl/100-file-format/01-ddl-create-file-format.md b/docs/doc/14-sql-commands/00-ddl/100-file-format/01-ddl-create-file-format.md index a7641a1ca987f..30cad0db02dc4 100644 --- a/docs/doc/14-sql-commands/00-ddl/100-file-format/01-ddl-create-file-format.md +++ b/docs/doc/14-sql-commands/00-ddl/100-file-format/01-ddl-create-file-format.md @@ -10,7 +10,7 @@ Creates a named file format. CREATE FILE FORMAT [ IF NOT EXISTS ] FileFormatOptions ``` -For details about `FileFormatOptions`, see [Input & Output File Formats](../../../13-sql-reference/75-file-format-options.md). +For details about `FileFormatOptions`, see [Input & Output File Formats](../../../13-sql-reference/50-file-format-options.md). ## Examples diff --git a/docs/doc/14-sql-commands/00-ddl/40-stage/01-ddl-create-stage.md b/docs/doc/14-sql-commands/00-ddl/40-stage/01-ddl-create-stage.md index 6ca11214967c7..1b457b45e063f 100644 --- a/docs/doc/14-sql-commands/00-ddl/40-stage/01-ddl-create-stage.md +++ b/docs/doc/14-sql-commands/00-ddl/40-stage/01-ddl-create-stage.md @@ -120,7 +120,7 @@ externalLocation ::= ### formatTypeOptions -For details about `FILE_FORMAT`, see [Input & Output File Formats](../../../13-sql-reference/75-file-format-options.md). +For details about `FILE_FORMAT`, see [Input & Output File Formats](../../../13-sql-reference/50-file-format-options.md). ### copyOptions ``` @@ -146,7 +146,7 @@ CREATE STAGE my_internal_stage; ### External Stages ```sql -CREATE STAGE my_s3_stage url='s3://load/files/' connection=(aws_key_id='1a2b3c' aws_secret_key='4x5y6z'); +CREATE STAGE my_s3_stage URL='s3://load/files/' CONNECTION = (ACCESS_KEY_ID = '' SECRET_ACCESS_KEY = ''); ``` ```sql @@ -156,4 +156,4 @@ DESC STAGE my_s3_stage; +-------------+------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------+---------+ | my_s3_stage | External | StageParams { storage: S3(StageS3Storage { bucket: "load", path: "/files/", credentials_aws_key_id: "", credentials_aws_secret_key: "", encryption_master_key: "" }) } | CopyOptions { on_error: None, size_limit: 0 } | FileFormatOptions { format: Csv, skip_header: 0, field_delimiter: ",", record_delimiter: "\n", compression: None } | | +-------------+------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------+---------+ -``` \ No newline at end of file +``` diff --git a/docs/doc/14-sql-commands/10-dml/dml-copy-into-location.md b/docs/doc/14-sql-commands/10-dml/dml-copy-into-location.md index 76420945a1827..56b77deb1670f 100644 --- a/docs/doc/14-sql-commands/10-dml/dml-copy-into-location.md +++ b/docs/doc/14-sql-commands/10-dml/dml-copy-into-location.md @@ -56,7 +56,7 @@ externalLocation (for Amazon S3) ::= ### FILE_FORMAT -See [Input & Output File Formats](../../13-sql-reference/75-file-format-options.md). +See [Input & Output File Formats](../../13-sql-reference/50-file-format-options.md). ### copyOptions ``` diff --git a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md index 9e6ee6ee42e5c..a01d987060b61 100644 --- a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md +++ b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md @@ -186,7 +186,7 @@ A [PCRE2](https://www.pcre.org/current/doc/html/)-based regular expression patte ### FILE_FORMAT -See [Input & Output File Formats](../../13-sql-reference/75-file-format-options.md). +See [Input & Output File Formats](../../13-sql-reference/50-file-format-options.md). ### copyOptions diff --git a/docs/doc/14-sql-commands/40-show/show-metrics.md b/docs/doc/14-sql-commands/40-show/show-metrics.md index 05ede3a8385bb..93e9c253d7af8 100644 --- a/docs/doc/14-sql-commands/40-show/show-metrics.md +++ b/docs/doc/14-sql-commands/40-show/show-metrics.md @@ -2,7 +2,7 @@ title: SHOW METRICS --- -Shows the list of [system metrics](../../13-sql-reference/70-system-tables/system-metrics.md). +Shows the list of [system metrics](../../13-sql-reference/20-system-tables/system-metrics.md). ## Syntax diff --git a/docs/doc/14-sql-commands/40-show/show-settings.md b/docs/doc/14-sql-commands/40-show/show-settings.md index 4421a7cc668ff..ce3772d75539a 100644 --- a/docs/doc/14-sql-commands/40-show/show-settings.md +++ b/docs/doc/14-sql-commands/40-show/show-settings.md @@ -2,7 +2,7 @@ title: SHOW SETTINGS --- -Shows the databend's [system settings](../../13-sql-reference/70-system-tables/system-settings.md). +Shows the databend's [system settings](../../13-sql-reference/20-system-tables/system-settings.md). You can change it by set command, like `set max_threads = 1`. diff --git a/docs/doc/15-sql-functions/112-table-functions/stage_table_function.md b/docs/doc/15-sql-functions/112-table-functions/stage_table_function.md index 8464edf4fd03d..d390cabe0b496 100644 --- a/docs/doc/15-sql-functions/112-table-functions/stage_table_function.md +++ b/docs/doc/15-sql-functions/112-table-functions/stage_table_function.md @@ -32,7 +32,7 @@ The function parameters are as follows: `` should be one of the following: -1. A built-in file format (see [Input & Output File Formats](../../13-sql-reference/75-file-format-options.md). +1. A built-in file format (see [Input & Output File Formats](../../13-sql-reference/50-file-format-options.md). 2. A named file format created by [CREATE FILE FORMAT](../../14-sql-commands/00-ddl/100-file-format/01-ddl-create-file-format.md). If not specified for named stages, the format of the stage should be used. diff --git a/docs/doc/15-sql-functions/130-geo-functions/_category_.json b/docs/doc/15-sql-functions/130-geo-functions/_category_.json index 619196e8dd15e..d61abd3523d33 100644 --- a/docs/doc/15-sql-functions/130-geo-functions/_category_.json +++ b/docs/doc/15-sql-functions/130-geo-functions/_category_.json @@ -1,7 +1,3 @@ { - "label": "Geography Functions", - "link": { - "type": "generated-index", - "slug": "/reference/functions/geo-functions" - } + "label": "Geography Functions" } \ No newline at end of file diff --git a/docs/doc/15-sql-functions/130-geo-functions/geo_to_h3.md b/docs/doc/15-sql-functions/130-geo-functions/geo_to_h3.md deleted file mode 100644 index 4203351f9f96c..0000000000000 --- a/docs/doc/15-sql-functions/130-geo-functions/geo_to_h3.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: GEO_TO_H3 ---- - -Returns the [H3](https://eng.uber.com/h3/) index of the hexagon cell where the given location resides. - -## Syntax - -```sql -GEO_TO_H3(lon, lat, res) -``` - -## Arguments - -| Argument | Type | Description | -|------------|---------|-----------------------------------------------------------------------------------------------------------| -| lon | Float64 | Specifies the location's longitude, for example, `37.79506683`. | -| lat | Float64 | Specifies the location's latitude, for example, `55.71290588`. | -| res | UInt8 | Sets an [H3 resolution](https://h3geo.org/docs/core-library/restable) ranging from 0 to 15.| - -## Return Type - -UInt64. - -:::note -Returning 0 means an error occurred. -::: - -## Examples - -```sql -SELECT GEO_TO_H3(37.79506683, 55.71290588, 15) AS h3Index; -+-------------------------------+ -| h3Index | -+-------------------------------+ -| 644325524701193974 | -+-------------------------------+ -``` \ No newline at end of file diff --git a/docs/doc/15-sql-functions/130-geo-functions/geohash_decode.md b/docs/doc/15-sql-functions/130-geo-functions/geohash_decode.md deleted file mode 100644 index 3e0fbdef254f0..0000000000000 --- a/docs/doc/15-sql-functions/130-geo-functions/geohash_decode.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: GEOHASH_DECODE ---- - -Converts a [Geohash](https://en.wikipedia.org/wiki/Geohash)-encoded string into latitude/longitude coordinates. - -See also: [GEOHASH_ENCODE](geohash_encode.md) - -## Syntax - -```sql -GEOHASH_DECODE('') -``` - -## Return Type - -Returns a latitude(Float64) and longitude(Float64) pair. - -## Examples - -```sql -SELECT GEOHASH_DECODE('ezs42') - ----- -(-5.60302734375,42.60498046875) -``` \ No newline at end of file diff --git a/docs/doc/15-sql-functions/130-geo-functions/geohash_encode.md b/docs/doc/15-sql-functions/130-geo-functions/geohash_encode.md deleted file mode 100644 index d072fe39913d1..0000000000000 --- a/docs/doc/15-sql-functions/130-geo-functions/geohash_encode.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: GEOHASH_ENCODE ---- - -Converts a pair of latitude and longitude coordinates into a [Geohash](https://en.wikipedia.org/wiki/Geohash)-encoded string. - -See also: [GEOHASH_DECODE](geohash_decode.md) - -## Syntax - -```sql -GEOHASH_ENCODE(lon, lat) -``` -## Arguments - -| Argument | Type | Description | -|------------|---------|-----------------------------------------------------------------------------------------------------------| -| lon | Float64 | Specifies the location's longitude, for example, `37.79506683`. | -| lat | Float64 | Specifies the location's latitude, for example, `55.71290588`. | - -## Return Type - -Returns a [Geohash](https://en.wikipedia.org/wiki/Geohash)-encoded string. - -## Examples - -```sql -SELECT GEOHASH_ENCODE(-5.60302734375, 42.593994140625) - ----- -ezs42d000000 -``` \ No newline at end of file diff --git a/docs/doc/15-sql-functions/130-geo-functions/index.md b/docs/doc/15-sql-functions/130-geo-functions/index.md new file mode 100644 index 0000000000000..18978dc77912e --- /dev/null +++ b/docs/doc/15-sql-functions/130-geo-functions/index.md @@ -0,0 +1,17 @@ +--- +title: 'Geography Functions' +--- + +| Function | Description | Example | Result | +|---------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------|---------------------------------| +| **GEO_TO_H3(lon, lat, res)** | Returns the [H3](https://eng.uber.com/h3/) index of the hexagon cell where the given location resides. | **GEO_TO_H3(37.79506683, 55.71290588, 15)** | 644325524701193974 | +| **GEOHASH_DECODE('')** | Converts a [Geohash](https://en.wikipedia.org/wiki/Geohash)-encoded string into latitude/longitude coordinates. | **GEOHASH_DECODE('ezs42')** | (-5.60302734375,42.60498046875) | +| **GEOHASH_ENCODE(lon, lat)** | Converts a pair of latitude and longitude coordinates into a [Geohash](https://en.wikipedia.org/wiki/Geohash)-encoded string. | **GEOHASH_ENCODE(-5.60302734375, 42.593994140625)** | ezs42d000000 | +| **POINT_IN_POLYGON((x,y), [(a,b), (c,d), (e,f) ... ])** | Calculates whether a given point falls within the polygon formed by joining multiple points. | **POINT_IN_POLYGON((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)])** | 1 | + +:::note + +- `GEO_TO_H3(lon, lat, res)` returning 0 means an error occurred. +- `POINT_IN_POLYGON((x,y), [(a,b), (c,d), (e,f) ... ])` A polygon is a closed shape connected by coordinate pairs in the order they appear. Changing the order of coordinate pairs can result in a different shape. + +::: diff --git a/docs/doc/15-sql-functions/130-geo-functions/point_in_polygon.md b/docs/doc/15-sql-functions/130-geo-functions/point_in_polygon.md deleted file mode 100644 index 35d698bc78b66..0000000000000 --- a/docs/doc/15-sql-functions/130-geo-functions/point_in_polygon.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: POINT_IN_POLYGON ---- - -Calculates whether a given point falls within the polygon formed by joining multiple points. - -## Syntax - -```sql -POINT_IN_POLYGON((x,y), [(a,b), (c,d), (e,f) ... ]) -``` - -## Arguments - -| Argument | Type | Description | -|-------------------------|-------------------|---------------------------------------------------------------------| -| (x,y) | (Float64,Float64) | Coordinates of the given point. | -| (a,b), (c,d), (e,f) ... | VARIANT | An ordered list of coordinate pairs defining the shape of a polygon.| - -:::note -A polygon is a closed shape connected by coordinate pairs in the order they appear. Changing the order of coordinate pairs can result in a different shape. -::: - -## Return Type - -Returns 1 if the given point falls within the formed polygon; otherwise, returns 0. - -## Examples - -```sql -SELECT POINT_IN_POLYGON((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) - ----- -1 -``` \ No newline at end of file diff --git a/docs/doc/15-sql-functions/60-conversion-functions/_category_.json b/docs/doc/15-sql-functions/60-conversion-functions/_category_.json index 1e2dc5a064e4e..d638b6eacfa54 100644 --- a/docs/doc/15-sql-functions/60-conversion-functions/_category_.json +++ b/docs/doc/15-sql-functions/60-conversion-functions/_category_.json @@ -1,7 +1,3 @@ { - "label": "Conversion Functions", - "link": { - "type": "generated-index", - "slug": "/reference/functions/conversion-functions" - } + "label": "Conversion Functions" } \ No newline at end of file diff --git a/docs/doc/15-sql-functions/60-conversion-functions/cast.md b/docs/doc/15-sql-functions/60-conversion-functions/cast.md deleted file mode 100644 index af3aad8a83df4..0000000000000 --- a/docs/doc/15-sql-functions/60-conversion-functions/cast.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: CAST ---- - -Convert a value from one data type to another data type. - -## Syntax - -```sql -CAST( AS ) -:: -``` - -## Arguments - -| Arguments | Description | -| ----------- | ----------- | -| `` | A value to convert. | -| `` | The target data type | - -:::tip - -[Databend Data Types](../../13-sql-reference/10-data-types/index.md) - -::: - -## Return Type - -Converted value. - -## Examples - -```sql -SELECT CAST(1 AS VARCHAR); -+-------------------+ -| cast(1 as String) | -+-------------------+ -| 1 | -+-------------------+ - -SELECT 1::VARCHAR; -+-----------+ -| 1::String | -+-----------+ -| 1 | -+-----------+ - -SELECT CAST(1 AS BIGINT UNSIGNED); -+-------------------+ -| cast(1 as UInt64) | -+-------------------+ -| 1 | -+-------------------+ - -SELECT typeof(CAST(1 AS BIGINT UNSIGNED)); -+-------------------------------+ -| typeof(cast(1 as UInt64)) | -+-------------------------------+ -| UInt64 | -+-------------------------------+ -``` diff --git a/docs/doc/15-sql-functions/60-conversion-functions/index.md b/docs/doc/15-sql-functions/60-conversion-functions/index.md new file mode 100644 index 0000000000000..9168435e24849 --- /dev/null +++ b/docs/doc/15-sql-functions/60-conversion-functions/index.md @@ -0,0 +1,35 @@ +--- +title: 'Conversion Functions' +--- + +| Function | Description | Example | Result | +|-----------------------------------|----------------------------------------------------------------------------------------|------------------------------------|----------------------------| +| **CAST( expr AS data_type )** | Convert a value from one data type to another data type | **CAST(1 AS VARCHAR)** | 1 | +| **expr::data_type** | alias for CAST | **1::VARCHAR** | 1 | +| **TRY_CAST( expr AS data_type )** | Convert a value from one data type to another data type. If error happens, return NULL | **TRY_CAST(1 AS VARCHAR)** | 1 | +| **TO_BOOLEAN( expr )** | Convert a value to BOOLEAN data type | **TO_BOOLEAN('true')** | 1 | +| **TO_DATE( expr )** | Convert a value to DATE data type | **TO_DATE(19109)** | 2022-04-27 | +| **TO_DATETIME( expr )** | Convert a value to DATETIME data type | **TO_DATETIME(1651036648)** | 2022-04-27 05:17:28.000000 | +| **TO_TIMESTAMP( expr )** | alias for TO_DATETIME | **TO_TIMESTAMP(1651036648123456)** | 2022-04-27 05:17:28.123456 | +| **TO_FLOAT32( expr )** | Convert a value to FLOAT32 data type | **TO_FLOAT32('1.2')** | 1.2 | +| **TO_FLOAT64( expr )** | Convert a value to FLOAT64 data type | **TO_FLOAT64('1.2')** | 1.2 | +| **TO_INT8( expr )** | Convert a value to INT8 data type | **TO_INT8('123')** | 123 | +| **TO_INT16( expr )** | Convert a value to INT16 data type | **TO_INT16('123')** | 123 | +| **TO_INT32( expr )** | Convert a value to INT32 data type | **TO_INT32('123')** | 123 | +| **TO_INT64( expr )** | Convert a value to INT64 data type | **TO_INT64('123')** | 123 | +| **TO_UINT8( expr )** | Convert a value to UINT8 data type | **TO_UINT8('123')** | 123 | +| **TO_UINT16( expr )** | Convert a value to UINT16 data type | **TO_UINT16('123')** | 123 | +| **TO_UINT32( expr )** | Convert a value to UINT32 data type | **TO_UINT32('123')** | 123 | +| **TO_UINT64( expr )** | Convert a value to UINT64 data type | **TO_UINT64('123')** | 123 | +| **TO_STRING( expr )** | Convert a value to STRING data type | **TO_STRING(10)** | 10 | + + +:::note + +`TO_DATETIME( expr )` and `TO_TIMESTAMP( expr )` uses the following rules to automatically determine the unit of time: + +- If the value is less than 31536000000, it is treated as a number of seconds, +- If the value is greater than or equal to 31536000000 and less than 31536000000000, it is treated as milliseconds. +- If the value is greater than or equal to 31536000000000, it is treated as microseconds. + +::: diff --git a/docs/doc/15-sql-functions/60-conversion-functions/try_cast.md b/docs/doc/15-sql-functions/60-conversion-functions/try_cast.md deleted file mode 100644 index b9858e9a60405..0000000000000 --- a/docs/doc/15-sql-functions/60-conversion-functions/try_cast.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: TRY_CAST ---- - -Convert a value from one data type to another data type. If error happens, return NULL. - -## Syntax - -```sql -TRY_CAST(x AS t) -``` - -## Arguments - -| Arguments | Description | -| ----------- | ----------- | -| x | A value to convert. | -| t | The target data type. | - -## Return Type - -Nullable datatype of the target data type - -## Examples - -```sql -SELECT try_cast(1 AS VARCHAR); -+-----------------------+ -| try_cast(1 as String) | -+-----------------------+ -| 1 | -+-----------------------+ - -SELECT try_cast('abc' AS INT UNSIGNED); -+---------------------------+ -| try_cast('abc' as UInt32) | -+---------------------------+ -| NULL | -+---------------------------+ - -SELECT typeof(try_cast('abc' AS INT UNSIGNED)); -+-----------------------------------+ -| typeof(try_cast('abc' as uint32)) | -+-----------------------------------+ -| INT UNSIGNED NULL | -+-----------------------------------+ -``` diff --git a/docs/doc/15-sql-functions/60-conversion-functions/type_conversion.md b/docs/doc/15-sql-functions/60-conversion-functions/type_conversion.md deleted file mode 100644 index 0e1399cf54555..0000000000000 --- a/docs/doc/15-sql-functions/60-conversion-functions/type_conversion.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: Type Conversion ---- - -Type conversion to target type. - -## Syntax - -```sql -TO_BOOLEAN( ) -TO_DATE( ) -TO_DATETIME( ) -TO_TIMESTAMP( ) -TO_FLOAT32( ) -TO_FLOAT64( ) -TO_INT8( ) -TO_INT16( ) -TO_INT32( ) -TO_INT64( ) -TO_NULL( ) -TO_STRING( ) -TO_UINT8( ) -TO_UINT16( ) -TO_UINT32( ) -TO_UINT64( ) -``` - -> `TO_DATETIME( )` and `TO_TIMESTAMP( )` uses the following rules to automatically determine the unit of time: -> -> - If the value is less than 31536000000, it is treated as a number of seconds, -> - If the value is greater than or equal to 31536000000 and less than 31536000000000, it is treated as milliseconds. -> - If the value is greater than or equal to 31536000000000, it is treated as microseconds. - -## Examples - -```sql -SELECT to_boolean('true'); -+--------------------+ -| to_boolean('true') | -+--------------------+ -| 1 | -+--------------------+ - -SELECT to_date(19109); -+----------------+ -| to_date(19109) | -+----------------+ -| 2022-04-27 | -+----------------+ - -SELECT to_datetime(1651036648); -+----------------------------+ -| to_datetime(1651036648) | -+----------------------------+ -| 2022-04-27 05:17:28.000000 | -+----------------------------+ - -SELECT to_datetime(1651036648123); -+----------------------------+ -| to_datetime(1651036648123) | -+----------------------------+ -| 2022-04-27 05:17:28.123000 | -+----------------------------+ - -SELECT to_timstamp(1651036648123456); -+--------------------------------+ -| to_timstamp(1651036648123456) | -+--------------------------------+ -| 2022-04-27 05:17:28.123456 | -+--------------------------------+ - -SELECT to_float32('1.2'); -+--------------------+ -| to_float32('1.2') | -+--------------------+ -| 1.2000000476837158 | -+--------------------+ - -SELECT to_float64('1.2'); -+-------------------+ -| to_float64('1.2') | -+-------------------+ -| 1.2 | -+-------------------+ - -SELECT to_int8('123'); -+----------------+ -| to_int8('123') | -+----------------+ -| 123 | -+----------------+ - -SELECT to_null(10); -+-------------+ -| to_null(10) | -+-------------+ -| NULL | -+-------------+ - -SELECT to_string(10); -+---------------+ -| to_string(10) | -+---------------+ -| 10 | -+---------------+ -``` diff --git a/docs/doc/15-sql-functions/80-uuid-functions/index.md b/docs/doc/15-sql-functions/80-uuid-functions/index.md index e47c531d203ff..021617706c052 100644 --- a/docs/doc/15-sql-functions/80-uuid-functions/index.md +++ b/docs/doc/15-sql-functions/80-uuid-functions/index.md @@ -4,5 +4,5 @@ title: 'UUID Functions' | Function | Description | Example | Result | |--------------------------------------|-----------------------------------------------------------------|------------------------|--------------------------------------| -| **GEN_RANDOM_UUID** | Generate a random UUID based on v4. | **gen_random_uuid()** | ab1bce12-4508-4d11-bd96-c42e9e7eefdd | +| **GEN_RANDOM_UUID()** | Generate a random UUID based on v4. | **GEN_RANDOM_UUID()** | ab1bce12-4508-4d11-bd96-c42e9e7eefdd | | **UUID()** | Generate a UUID. | **UUID()** | c72fe96b-3662-4f49-a63b-345b17ceebd6 | diff --git a/docs/doc/20-develop/00-golang.md b/docs/doc/20-develop/00-golang.md deleted file mode 100644 index ec8daef420540..0000000000000 --- a/docs/doc/20-develop/00-golang.md +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: How to Work With Databend in Golang -sidebar_label: Golang -description: - How to work with Databend in Golang. ---- - -## Before You Begin - -* **Databend :** Make sure Databend is running and accessible, see [How to deploy Databend](/doc/deploy). -* [How to Create User](../14-sql-commands/00-ddl/30-user/01-user-create-user.md) -* [How to Grant Privileges to User](../14-sql-commands/00-ddl/30-user/10-grant-privileges.md) - -## Create Databend User - -```shell -mysql -h127.0.0.1 -uroot -P3307 -``` - -### Create a User - -```sql -CREATE USER user1 IDENTIFIED BY 'abc123'; -``` - -### Grants Privileges - -Grants `ALL` privileges to the user `user1`: -```sql -GRANT ALL on *.* TO user1; -``` - -## Golang - -This guideline show how to connect and query to Databend using Golang. We will be creating a table named `books` and insert a row, then query it. - -### main.go - -```text title='main.go' -package main - -import ( - "database/sql" - "fmt" - "log" - - _ "github.com/go-sql-driver/mysql" -) - -const ( - username = "user1" - password = "abc123" - hostname = "127.0.0.1:3307" -) - -type Book struct { - Title string - Author string - Date string -} - -func dsn() string { - // Note Databend do not support prepared statements. - // set interpolateParams to make placeholders (?) in calls to db.Query() and db.Exec() interpolated into a single query string with given parameters. - // ref https://github.com/go-sql-driver/mysql#interpolateparams - return fmt.Sprintf("%s:%s@tcp(%s)/?interpolateParams=true", username, password, hostname) -} - -func main() { - db, err := sql.Open("mysql", dsn()) - - if err != nil { - log.Fatal(err) - } - defer db.Close() - - err = db.Ping() - if err != nil { - log.Fatal(err) - } - log.Println("Connected") - - // Create db if do not exist - dbSql := "CREATE DATABASE IF NOT EXISTS book_db" - _, err = db.Exec(dbSql) - if err != nil { - log.Fatal(err) - } - log.Println("Create database book_db success") - - // Use book_db database - _, err = db.Exec("USE book_db") - if err != nil { - log.Fatal(err) - } - - // Create table. - sql := "create table if not exists books(title VARCHAR, author VARCHAR, date VARCHAR)" - _, err = db.Exec(sql) - if err != nil { - log.Fatal(err) - } - log.Println("Create table: books") - - // Insert 1 row. - _, err = db.Exec("INSERT INTO books VALUES(?, ?, ?)", "mybook", "author", "2022") - if err != nil { - log.Fatal(err) - } - log.Println("Insert 1 row") - - // Select. - res, err := db.Query("SELECT * FROM books") - if err != nil { - log.Fatal(err) - } - - for res.Next() { - var book Book - err := res.Scan(&book.Title, &book.Author, &book.Date) - if err != nil { - log.Fatal(err) - } - - log.Printf("Select:%v", book) - } - -} -``` - -### Golang mod - -```text -go mod init databend-golang -``` - -```text title='go.mod' -module databend-golang - -go 1.18 - -require github.com/go-sql-driver/mysql v1.6.0 -``` - -### Run main.go - -```shell -go run main.go -``` - -```text title='Outputs' -2022/04/13 12:20:07 Connected -2022/04/13 12:20:07 Create database book_db success -2022/04/13 12:20:07 Create table: books -2022/04/13 12:20:07 Insert 1 row -2022/04/13 12:20:08 Select:{mybook author 2022} -``` diff --git a/docs/doc/20-develop/01-python.md b/docs/doc/20-develop/01-python.md deleted file mode 100644 index 744f9d1f6e8d1..0000000000000 --- a/docs/doc/20-develop/01-python.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: How to Work With Databend in Python -sidebar_label: Python -description: - How to work with Databend in Python. ---- - -## Before You Begin - -* **Databend :** Make sure Databend is running and accessible, see [How to deploy Databend](/doc/deploy). -* [How to Create User](../14-sql-commands/00-ddl/30-user/01-user-create-user.md) -* [How to Grant Privileges to User](../14-sql-commands/00-ddl/30-user/10-grant-privileges.md) - -## Create Databend User - -```shell -mysql -h127.0.0.1 -uroot -P3307 -``` - -### Create a User - -```sql -CREATE USER user1 IDENTIFIED BY 'abc123'; -``` - -### Grants Privileges - -Grants `ALL` privileges to the user `user1`: -```sql -GRANT ALL ON *.* TO user1; -``` - -## Python - -This guideline show how to connect and query to Databend using Python. - -We will be creating a table named `books` and insert a row, then query it. - -### Using mysql.connector - -```shell -pip install mysql-connector-python -``` - -```python title='main.py' -#!/usr/bin/env python3 -import mysql.connector - -cnx = mysql.connector.connect(user='user1', password='abc123', - host='127.0.0.1', - port = 3307, - database='') - -# Create database, table. -cursor = cnx.cursor() -cursor.execute("CREATE DATABASE IF NOT EXISTS book_db") -cursor.execute("USE book_db") -cursor.execute("CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)") - -# Insert new book. -add_book = ("INSERT INTO books " - "(title, author, date) " - "VALUES (%s, %s, %s)") -data_book = ('mybook', 'author', '2022') -cursor.execute(add_book, data_book) - -# Query. -query = ("SELECT * FROM books") -cursor.execute(query) -for (title, author, date) in cursor: - print("{} {} {}".format(title, author, date)) - -cursor.close() -cnx.close() -``` - -Run `python main.py`: -```text -mybook author 2022 -``` - -### Using sqlalchemy - -```shell -pip install sqlalchemy -``` - -```python title='main.py' -#!/usr/bin/env python3 - -import sqlalchemy - -engine = sqlalchemy.create_engine("mysql+pymysql://user1:abc123@localhost:3307/") -conn = engine.connect() -conn.execute("CREATE DATABASE IF NOT EXISTS book_db") -conn.execute("USE book_db") -conn.execute("CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)") -conn.execute("INSERT INTO books VALUES('mybook', 'author', '2022')") -results = conn.execute('SELECT * FROM books').fetchall() -for result in results: - print(result) -conn.execute('drop database book_db') - -``` - -Run `python main.py`: -```text -('mybook', 'author', '2022') -``` diff --git a/docs/doc/20-develop/02-nodejs.md b/docs/doc/20-develop/02-nodejs.md deleted file mode 100644 index 63ccd14fab4b0..0000000000000 --- a/docs/doc/20-develop/02-nodejs.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: How to Work With Databend in Node.js -sidebar_label: Node.js -description: - How to work with Databend in Node.js. ---- - -## Before You Begin - -* **Databend :** Make sure Databend is running and accessible, see [How to deploy Databend](/doc/deploy). -* [How to Create User](../14-sql-commands/00-ddl/30-user/01-user-create-user.md) -* [How to Grant Privileges to User](../14-sql-commands/00-ddl/30-user/10-grant-privileges.md) - -## Create Databend User - -```shell -mysql -h127.0.0.1 -uroot -P3307 -``` - -### Create a User - -```sql -CREATE USER user1 IDENTIFIED BY 'abc123'; -``` - -### Grants Privileges - -Grants `ALL` privileges to the user `user1`: -```sql -GRANT ALL ON *.* TO user1; -``` - -## Node.js - -This guideline show how to connect and query to Databend using Node.js. - -We will be creating a table named `books` and insert a row, then query it. - -```text -npm install --save mysql -``` - -```js title='databend.js' -const mysql = require('mysql'); -const con = mysql.createConnection({ - host: 'localhost', - port: 3307, - user: 'user1', - password: 'abc123', -}); - -con.connect((err) => { - if (err) throw err; - console.log('Connected to Databend Server!'); - - var sql = "CREATE DATABASE IF NOT EXISTS book_db"; - con.query(sql, function (err, result) { - if (err) throw err; - console.log("Dataabse created"); - }); - - var sql = "USE book_db"; - con.query(sql, function (err, result) { - if (err) throw err; - }); - - - var sql = "CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)"; - con.query(sql, function (err, result) { - if (err) throw err; - console.log("Table created"); - }); - - var sql = "INSERT INTO books VALUES('mybook', 'author', '2022')"; - con.query(sql, function (err, result) { - if (err) throw err; - console.log("1 record inserted"); - }); - - con.query("SELECT * FROM books", function (err, result, fields) { - if (err) throw err; - console.log(result); - }); - -}); -``` - -Run `nodejs databend.js`: - -```text -Connected to Databend Server! -Dataabse created -Table created -1 record inserted -[ RowDataPacket { title: 'mybook', author: 'author', date: '2022' } ] -``` diff --git a/docs/doc/20-develop/03-java.md b/docs/doc/20-develop/03-java.md deleted file mode 100644 index 17178bf271f1b..0000000000000 --- a/docs/doc/20-develop/03-java.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: How to Work With Databend in Java -sidebar_label: Java -description: - How to work with Databend in Java. ---- - -## Before You Begin - -* **Databend :** Make sure Databend is running and accessible, see [How to deploy Databend](/doc/deploy). -* [How to Create User](../14-sql-commands/00-ddl/30-user/01-user-create-user.md) -* [How to Grant Privileges to User](../14-sql-commands/00-ddl/30-user/10-grant-privileges.md) - -## Create Databend User - -```shell -mysql -h127.0.0.1 -uroot -P3307 -``` - -### Create a User - -```sql -CREATE USER user1 IDENTIFIED BY 'abc123'; -``` - -### Grants Privileges - -Grants `ALL` privileges to the user `user1`: -```sql -GRANT ALL on *.* TO user1; -``` - -## Java - -This topic shows how to connect and query Databend using JDBC. We will create a table named books, insert a row, and then query data from the table. - -### demo.java - -```java title='demo.java' -import java.sql.*; - -public class demo { - static final String JDBC_DRIVER = "com.mysql.jdbc.Driver"; - static final String DB_URL = "jdbc:mysql://127.0.0.1:3307/default"; - - static final String USER = "user1"; - static final String PASS = "abc123"; - - public static void main(String[] args) { - Connection conn = null; - Statement stmt = null; - try{ - Class.forName(JDBC_DRIVER); - conn = DriverManager.getConnection(DB_URL,USER,PASS); - - stmt = conn.createStatement(); - String create_sql = "CREATE DATABASE IF NOT EXISTS book_db"; - int rs1 = stmt.executeUpdate(create_sql); - - String use_sql = "USE book_db"; - int rs2 = stmt.executeUpdate(use_sql); - - String ct_sql = "CREATE TABLE IF NOT EXISTS books(title VARCHAR, author VARCHAR, date VARCHAR)"; - int rs3 = stmt.executeUpdate(ct_sql); - - - // Insert new book. - String title = "mybook"; - String author = "author"; - String date = "2022"; - String add_book = "INSERT INTO books (title, author, date) VALUES ('"+ title +"', '"+ author +"', '" + date + "')"; - int rs4 = stmt.executeUpdate(add_book); - - - // Select book - String sql = "SELECT * FROM books"; - ResultSet rs = stmt.executeQuery(sql); - - while (rs.next()) { - String col1 = rs.getString("title"); - String col2 = rs.getString("author"); - String col3 = rs.getString("date"); - - System.out.print("title: " + col1 + ", author: " + col2 + ", date: " + col3); - } - // Close conn - rs.close(); - stmt.close(); - conn.close(); - } catch(SQLException se) { - // throw JDBC err - se.printStackTrace(); - } catch(Exception e) { - // throw Class.forName err - e.printStackTrace(); - } finally { - // Close source - try{ - if(stmt!=null) stmt.close(); - } catch(SQLException se2) { - } - try{ - if (conn!=null) conn.close(); - } catch(SQLException se) { - se.printStackTrace(); - } - } - } -} -``` - -### Run demo.java - -In this case: - -The demo classpath is located at /home/eason/database/source-code/test/out/test/test - -The demo classpath is located at /home/eason/Downloads/jar_files/mysql-connector-java-5.1.48.jar - -```shell -$ ~/.jdks/openjdk-17.0.1/bin/java -Dfile.encoding=UTF-8 -classpath /home/eason/database/source-code/test/out/test/test:/home/eason/Downloads/jar_files/mysql-connector-java-5.1.48.jar demo -title: mybook, author: author, date: 2022 -``` - -```text title='Outputs' -title: mybook, author: author, date: 2022 -``` diff --git a/docs/doc/90-contributing/01-rfcs/20230213-query-result-cache.md b/docs/doc/90-contributing/01-rfcs/20230213-query-result-cache.md index 1285936578819..0a9c24fe5b4b4 100644 --- a/docs/doc/90-contributing/01-rfcs/20230213-query-result-cache.md +++ b/docs/doc/90-contributing/01-rfcs/20230213-query-result-cache.md @@ -139,6 +139,26 @@ The table contains such information: - `result_size`: the size of the result cache (bytes). - `location`: the location of the result cache file. +### Table function `RESULT_SCAN` + +`RESULT_SCAN` is a useful table function to retrieve the result set of a previous query. + +It can be used like: + +```sql +select * from RESULT_SCAN(''); +select * from RESULT_SCAN(LAST_QUERY_ID()); +``` + +If the previous query result is cached, we can get the result set quickly from query result cache. + ### Non-deterministic functions Some functions are non-deterministic, such as `now()`, `rand()`, `uuid()`, etc. If these functions are used in the query, the result will not be cached. + +## References + +- [Query Cache in ClickHouse](https://clickhouse.com/docs/en/operations/query-cache/) +- [Blog about the query cache in ClickHouse](https://clickhouse.com/blog/introduction-to-the-clickhouse-query-cache-and-design) +- [RESULT_SCAN in snowflake](https://docs.snowflake.com/en/sql-reference/functions/result_scan) +- [Tuning the Result Cache in Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/tgdba/tuning-result-cache.html) \ No newline at end of file diff --git a/scripts/ci/deploy/config/databend-query-node-1.toml b/scripts/ci/deploy/config/databend-query-node-1.toml index c404bf476004a..ab03f216ff1c7 100644 --- a/scripts/ci/deploy/config/databend-query-node-1.toml +++ b/scripts/ci/deploy/config/databend-query-node-1.toml @@ -54,7 +54,7 @@ default_compression = 'zstd' [log] [log.file] -level = "WARN" +level = "INFO" format = "text" dir = "./.databend/logs_1" diff --git a/scripts/ci/deploy/config/databend-query-node-2.toml b/scripts/ci/deploy/config/databend-query-node-2.toml index 3436aa97d972b..46460b68ee689 100644 --- a/scripts/ci/deploy/config/databend-query-node-2.toml +++ b/scripts/ci/deploy/config/databend-query-node-2.toml @@ -37,7 +37,7 @@ default_compression = 'zstd' [log] [log.file] -level = "ERROR" +level = "INFO" format = "text" dir = "./.databend/logs_2" diff --git a/scripts/ci/deploy/config/databend-query-node-3.toml b/scripts/ci/deploy/config/databend-query-node-3.toml index 80de1fc49994d..1d27ac466c042 100644 --- a/scripts/ci/deploy/config/databend-query-node-3.toml +++ b/scripts/ci/deploy/config/databend-query-node-3.toml @@ -38,7 +38,7 @@ default_compression = 'zstd' [log] [log.file] -level = "ERROR" +level = "INFO" format = "text" dir = "./.databend/logs_3" diff --git a/src/common/hashtable/Cargo.toml b/src/common/hashtable/Cargo.toml index 1a8b9383094a2..7d90d0ccf148a 100644 --- a/src/common/hashtable/Cargo.toml +++ b/src/common/hashtable/Cargo.toml @@ -20,8 +20,8 @@ common-base = { path = "../base" } ahash = { version = "0.8.2", features = ["no-rng"] } bumpalo = "3.10.0" cfg-if = "1.0.0" +ethnum = { version = "1.3" } ordered-float = { workspace = true, features = ["serde"] } -primitive-types = "0.12.0" [dev-dependencies] rand = "0.8.5" diff --git a/src/common/hashtable/src/partitioned_hashtable.rs b/src/common/hashtable/src/partitioned_hashtable.rs index d1541670d2dfc..7b6103d472a6c 100644 --- a/src/common/hashtable/src/partitioned_hashtable.rs +++ b/src/common/hashtable/src/partitioned_hashtable.rs @@ -16,6 +16,7 @@ use std::collections::VecDeque; use std::iter::TrustedLen; use std::mem::MaybeUninit; use std::slice::IterMut; +use std::vec::IntoIter; use crate::FastHash; use crate::HashSet; @@ -42,6 +43,18 @@ impl pub fn iter_tables_mut(&mut self) -> IterMut<'_, Impl> { self.tables.iter_mut() } + + pub fn into_iter_tables(self) -> IntoIter { + self.tables.into_iter() + } + + // #Unsafe the caller must ensure that the hashtable is not used after take_inner_tables + pub unsafe fn pop_first_inner_table(&mut self) -> Option { + match self.tables.is_empty() { + true => None, + false => Some(self.tables.remove(0)), + } + } } /// crc32c hash will return a 32-bit hash value even it's type is u64. diff --git a/src/common/hashtable/src/traits.rs b/src/common/hashtable/src/traits.rs index 701c1b5077995..00acd1471d7e5 100644 --- a/src/common/hashtable/src/traits.rs +++ b/src/common/hashtable/src/traits.rs @@ -15,13 +15,13 @@ // To avoid RUSTFLAGS="-C target-feature=+sse4.2" warning. #![allow(unused_imports)] use std::hash::BuildHasher; +use std::hash::Hasher; use std::iter::TrustedLen; use std::mem::MaybeUninit; use std::num::NonZeroU64; +use ethnum::U256; use ordered_float::OrderedFloat; -use primitive_types::U256; -use primitive_types::U512; /// # Safety /// @@ -82,29 +82,12 @@ impl_key_for_primitive_types!(i128); unsafe impl Keyable for U256 { #[inline(always)] fn equals_zero(this: &Self) -> bool { - U256::is_zero(this) + *this == U256::ZERO } #[inline(always)] fn is_zero(this: &MaybeUninit) -> bool { - U256::is_zero(unsafe { this.assume_init_ref() }) - } - - #[inline(always)] - fn hash(&self) -> u64 { - self.fast_hash() - } -} - -unsafe impl Keyable for U512 { - #[inline(always)] - fn is_zero(this: &MaybeUninit) -> bool { - U512::is_zero(unsafe { this.assume_init_ref() }) - } - - #[inline(always)] - fn equals_zero(this: &Self) -> bool { - U512::is_zero(this) + *unsafe { this.assume_init_ref() } == U256::ZERO } #[inline(always)] @@ -257,7 +240,8 @@ impl FastHash for U256 { use std::arch::x86_64::_mm_crc32_u64; let mut value = u64::MAX; for x in self.0 { - value = unsafe { _mm_crc32_u64(value, x) }; + value = unsafe { _mm_crc32_u64(value, x as u64) }; + value = unsafe { _mm_crc32_u64(value, (x >> 64) as u64) }; } value } else { @@ -265,31 +249,7 @@ impl FastHash for U256 { let state = ahash::RandomState::with_seeds(SEEDS[0], SEEDS[1], SEEDS[2], SEEDS[3]); let mut hasher = state.build_hasher(); for x in self.0 { - hasher.write_u64(x); - } - hasher.finish() - } - } - } -} - -impl FastHash for U512 { - #[inline(always)] - fn fast_hash(&self) -> u64 { - cfg_if::cfg_if! { - if #[cfg(target_feature = "sse4.2")] { - use std::arch::x86_64::_mm_crc32_u64; - let mut value = u64::MAX; - for x in self.0 { - value = unsafe { _mm_crc32_u64(value, x) }; - } - value - } else { - use std::hash::Hasher; - let state = ahash::RandomState::with_seeds(SEEDS[0], SEEDS[1], SEEDS[2], SEEDS[3]); - let mut hasher = state.build_hasher(); - for x in self.0 { - hasher.write_u64(x); + hasher.write_u128(x); } hasher.finish() } diff --git a/src/common/io/src/file_split.rs b/src/common/io/src/file_split.rs deleted file mode 100644 index 5f1154a728a42..0000000000000 --- a/src/common/io/src/file_split.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::borrow::Cow; - -#[derive(Debug)] -pub struct FileSplitCow<'a> { - pub path: Option, - pub start_offset: usize, - pub start_row: usize, - pub buf: Cow<'a, [u8]>, -} - -#[derive(Debug)] -pub struct FileSplit { - pub path: Option, - pub start_offset: usize, - pub start_row: usize, - pub buf: Vec, -} - -impl FileSplit { - pub fn to_cow(self) -> FileSplitCow<'static> { - FileSplitCow { - path: self.path, - start_offset: self.start_offset, - start_row: self.start_row, - buf: Cow::from(self.buf), - } - } - - pub fn from_cow(data: FileSplitCow<'_>) -> Self { - Self { - path: data.path, - start_offset: data.start_offset, - start_row: data.start_row, - buf: data.buf.into_owned(), - } - } -} diff --git a/src/common/io/src/lib.rs b/src/common/io/src/lib.rs index de3a7b8d2d096..718fc5f9d74ca 100644 --- a/src/common/io/src/lib.rs +++ b/src/common/io/src/lib.rs @@ -32,7 +32,6 @@ mod binary_write; pub mod cursor_ext; mod escape; -mod file_split; mod format_settings; mod position; mod serialization; diff --git a/src/common/io/src/prelude.rs b/src/common/io/src/prelude.rs index c7bee434055f9..e67c8b6522ebf 100644 --- a/src/common/io/src/prelude.rs +++ b/src/common/io/src/prelude.rs @@ -18,7 +18,6 @@ pub use bytes::BytesMut; pub use crate::binary_read::BinaryRead; pub use crate::binary_write::put_uvarint; pub use crate::binary_write::BinaryWrite; -pub use crate::file_split::*; pub use crate::format_settings::FormatSettings; pub use crate::position::*; pub use crate::serialization::*; diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index cec42d94bbe4c..2ecb20987d61b 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -36,6 +36,7 @@ use common_meta_app::storage::StorageOssConfig; use common_meta_app::storage::StorageParams; use common_meta_app::storage::StorageRedisConfig; use common_meta_app::storage::StorageS3Config; +use common_meta_app::storage::StorageWebhdfsConfig; use opendal::layers::ImmutableIndexLayer; use opendal::layers::LoggingLayer; use opendal::layers::MetricsLayer; @@ -66,6 +67,7 @@ pub fn init_operator(cfg: &StorageParams) -> Result { StorageParams::S3(cfg) => build_operator(init_s3_operator(cfg)?), StorageParams::Oss(cfg) => build_operator(init_oss_operator(cfg)?), StorageParams::Redis(cfg) => build_operator(init_redis_operator(cfg)?), + StorageParams::Webhdfs(cfg) => build_operator(init_webhdfs_operator(cfg)?), v => { return Err(Error::new( ErrorKind::InvalidInput, @@ -294,6 +296,17 @@ fn init_redis_operator(v: &StorageRedisConfig) -> Result { Ok(builder.build()?) } +/// init_webhdfs_operator will init a WebHDFS operator +fn init_webhdfs_operator(v: &StorageWebhdfsConfig) -> Result { + let mut builder = services::Webhdfs::default(); + + builder.endpoint(&v.endpoint_url); + builder.root(&v.root); + builder.delegation(&v.delegation); + + Ok(builder.build()?) +} + /// DataOperator is the operator to access persist data services. /// /// # Notes diff --git a/src/meta/app/src/storage/storage_params.rs b/src/meta/app/src/storage/storage_params.rs index 176ddbef82998..737cdb0e569c9 100644 --- a/src/meta/app/src/storage/storage_params.rs +++ b/src/meta/app/src/storage/storage_params.rs @@ -37,6 +37,7 @@ pub enum StorageParams { Oss(StorageOssConfig), S3(StorageS3Config), Redis(StorageRedisConfig), + Webhdfs(StorageWebhdfsConfig), /// None means this storage type is none. /// @@ -70,6 +71,7 @@ impl StorageParams { StorageParams::S3(v) => v.endpoint_url.starts_with("https://"), StorageParams::Gcs(v) => v.endpoint_url.starts_with("https://"), StorageParams::Redis(_) => false, + StorageParams::Webhdfs(v) => v.endpoint_url.starts_with("https://"), StorageParams::None => false, } } @@ -91,6 +93,7 @@ impl StorageParams { StorageParams::S3(v) => v.root = f(&v.root), StorageParams::Gcs(v) => v.root = f(&v.root), StorageParams::Redis(v) => v.root = f(&v.root), + StorageParams::Webhdfs(v) => v.root = f(&v.root), StorageParams::None => {} }; @@ -156,6 +159,9 @@ impl Display for StorageParams { v.db, v.root, v.endpoint_url ) } + StorageParams::Webhdfs(v) => { + write!(f, "webhdfs | root={},endpoint={}", v.root, v.endpoint_url) + } StorageParams::None => { write!(f, "none",) } @@ -472,6 +478,26 @@ impl Debug for StorageRedisConfig { } } +/// config for WebHDFS Storage Service +#[derive(Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct StorageWebhdfsConfig { + pub endpoint_url: String, + pub root: String, + pub delegation: String, +} + +impl Debug for StorageWebhdfsConfig { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut ds = f.debug_struct("StorageWebhdfsConfig"); + + ds.field("endpoint_url", &self.endpoint_url) + .field("root", &self.root); + + ds.field("delegation", &mask_string(&self.delegation, 3)); + + ds.finish() + } +} /// Mask a string by "******", but keep `unmask_len` of suffix. /// /// Copied from `common-base` so that we don't need to depend on it. diff --git a/src/meta/proto-conv/src/config_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/config_from_to_protobuf_impl.rs index bcee566f25c13..3b0316f27af9d 100644 --- a/src/meta/proto-conv/src/config_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/config_from_to_protobuf_impl.rs @@ -16,6 +16,7 @@ use common_meta_app::storage::StorageFsConfig; use common_meta_app::storage::StorageGcsConfig; use common_meta_app::storage::StorageOssConfig; use common_meta_app::storage::StorageS3Config; +use common_meta_app::storage::StorageWebhdfsConfig; use common_protos::pb; use crate::reader_check_msg; @@ -157,3 +158,34 @@ impl FromToProto for StorageOssConfig { }) } } + +impl FromToProto for StorageWebhdfsConfig { + type PB = pb::WebhdfsStorageConfig; + fn get_pb_ver(p: &Self::PB) -> u64 { + p.version + } + + fn from_pb(p: Self::PB) -> Result + where Self: Sized { + reader_check_msg(p.version, p.min_reader_ver)?; + + Ok(StorageWebhdfsConfig { + endpoint_url: p.endpoint_url, + root: p.root, + delegation: p.delegation, + }) + } + + fn to_pb(&self) -> Result { + Ok(pb::WebhdfsStorageConfig { + version: VER, + min_reader_ver: MIN_READER_VER, + endpoint_url: self.endpoint_url.clone(), + root: self.root.clone(), + delegation: self.delegation.clone(), + + username: String::new(), // reserved for future use + password: String::new(), // reserved for future use + }) + } +} diff --git a/src/meta/proto-conv/src/user_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/user_from_to_protobuf_impl.rs index 61098a8c91a20..f58b8d59e6973 100644 --- a/src/meta/proto-conv/src/user_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/user_from_to_protobuf_impl.rs @@ -537,6 +537,9 @@ impl FromToProto for mt::storage::StorageParams { Some(pb::user_stage_info::stage_storage::Storage::Oss(s)) => Ok( mt::storage::StorageParams::Oss(mt::storage::StorageOssConfig::from_pb(s)?), ), + Some(pb::user_stage_info::stage_storage::Storage::Webhdfs(s)) => Ok( + mt::storage::StorageParams::Webhdfs(mt::storage::StorageWebhdfsConfig::from_pb(s)?), + ), None => Err(Incompatible { reason: "StageStorage.storage cannot be None".to_string(), }), @@ -557,6 +560,11 @@ impl FromToProto for mt::storage::StorageParams { mt::storage::StorageParams::Oss(v) => Ok(pb::user_stage_info::StageStorage { storage: Some(pb::user_stage_info::stage_storage::Storage::Oss(v.to_pb()?)), }), + mt::storage::StorageParams::Webhdfs(v) => Ok(pb::user_stage_info::StageStorage { + storage: Some(pb::user_stage_info::stage_storage::Storage::Webhdfs( + v.to_pb()?, + )), + }), others => Err(Incompatible { reason: format!("stage type: {} not supported", others), }), diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index 33ecd3583dbbf..c86127deec6f4 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -91,6 +91,10 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ 29, "2023-02-23: Add: metadata.proto/DataType EmptyMap types", ), + ( + 30, + "2023-02-21: Add: config.proto/WebhdfsStorageConfig; Modify: user.proto/UserStageInfo::StageStorage", + ), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index dd8b83f86d190..e79c18df99f92 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -36,3 +36,4 @@ mod v026_schema; mod v027_schema; mod v028_schema; mod v029_schema; +mod v030_user_stage; diff --git a/src/meta/proto-conv/tests/it/user_proto_conv.rs b/src/meta/proto-conv/tests/it/user_proto_conv.rs index b5a10e10ffaf4..254c49b05a2f4 100644 --- a/src/meta/proto-conv/tests/it/user_proto_conv.rs +++ b/src/meta/proto-conv/tests/it/user_proto_conv.rs @@ -27,6 +27,7 @@ use common_meta_app::storage::StorageGcsConfig; use common_meta_app::storage::StorageOssConfig; use common_meta_app::storage::StorageParams; use common_meta_app::storage::StorageS3Config; +use common_meta_app::storage::StorageWebhdfsConfig; use common_proto_conv::FromToProto; use common_proto_conv::Incompatible; use common_proto_conv::VER; @@ -308,6 +309,43 @@ pub(crate) fn test_oss_stage_info() -> mt::principal::UserStageInfo { } } +// version 29 added WebHDFS as a stage backend, should be tested +pub(crate) fn test_webhdfs_stage_info() -> mt::principal::UserStageInfo { + mt::principal::UserStageInfo { + stage_name: "webhdfs://path/to/stage/files".to_string(), + stage_type: mt::principal::StageType::External, + stage_params: mt::principal::StageParams { + storage: StorageParams::Webhdfs(StorageWebhdfsConfig { + endpoint_url: "https://webhdfs.example.com".to_string(), + root: "/path/to/stage/files".to_string(), + delegation: "".to_string(), + }), + }, + file_format_options: mt::principal::FileFormatOptions { + format: mt::principal::StageFileFormatType::Json, + skip_header: 1024, + field_delimiter: "|".to_string(), + record_delimiter: "//".to_string(), + nan_display: "NaN".to_string(), + escape: "".to_string(), + compression: mt::principal::StageFileCompression::Bz2, + row_tag: "row".to_string(), + quote: "".to_string(), + name: None, + }, + copy_options: mt::principal::CopyOptions { + on_error: mt::principal::OnErrorMode::SkipFileNum(3141), + size_limit: 1038, + split_size: 0, + purge: true, + single: false, + max_file_size: 0, + }, + comment: "test".to_string(), + ..Default::default() + } +} + pub(crate) fn test_stage_file() -> mt::principal::StageFile { let dt = NaiveDateTime::new( NaiveDate::from_ymd(2022, 9, 16), @@ -479,6 +517,25 @@ fn test_user_incompatible() -> anyhow::Result<()> { ) } + { + let webhdfs_stage_info = test_webhdfs_stage_info(); + let mut p = webhdfs_stage_info.to_pb()?; + p.ver = VER + 1; + p.min_reader_ver = VER + 1; + + let res = mt::principal::UserStageInfo::from_pb(p); + assert_eq!( + Incompatible { + reason: format!( + "executable ver={} is smaller than the min reader version({}) that can read this message", + VER, + VER + 1 + ) + }, + res.unwrap_err() + ) + } + Ok(()) } diff --git a/src/meta/proto-conv/tests/it/user_stage.rs b/src/meta/proto-conv/tests/it/user_stage.rs index 51fee87d76f04..38cb91c97b387 100644 --- a/src/meta/proto-conv/tests/it/user_stage.rs +++ b/src/meta/proto-conv/tests/it/user_stage.rs @@ -23,6 +23,7 @@ use crate::user_proto_conv::test_internal_stage_info_v17; use crate::user_proto_conv::test_oss_stage_info; use crate::user_proto_conv::test_s3_stage_info; use crate::user_proto_conv::test_user_stage_info_v18; +use crate::user_proto_conv::test_webhdfs_stage_info; #[test] fn test_user_stage_fs_latest() -> anyhow::Result<()> { @@ -48,6 +49,67 @@ fn test_user_stage_oss_latest() -> anyhow::Result<()> { Ok(()) } +#[test] +fn test_user_stage_webhdfs_latest() -> anyhow::Result<()> { + common::test_pb_from_to("user_stage_webhdfs", test_webhdfs_stage_info())?; + Ok(()) +} + +#[test] +fn test_user_stage_webhdfs_v30() -> anyhow::Result<()> { + // Encoded data of version 30 of common_meta_app::principal::user_stage::UserStageInfo: + // It is generated with common::test_pb_from_to(). + let user_stage_info_v30 = vec![ + 10, 29, 119, 101, 98, 104, 100, 102, 115, 58, 47, 47, 112, 97, 116, 104, 47, 116, 111, 47, + 115, 116, 97, 103, 101, 47, 102, 105, 108, 101, 115, 16, 1, 26, 81, 10, 79, 42, 77, 10, 27, + 104, 116, 116, 112, 115, 58, 47, 47, 119, 101, 98, 104, 100, 102, 115, 46, 101, 120, 97, + 109, 112, 108, 101, 46, 99, 111, 109, 18, 20, 47, 112, 97, 116, 104, 47, 116, 111, 47, 115, + 116, 97, 103, 101, 47, 102, 105, 108, 101, 115, 26, 18, 60, 100, 101, 108, 101, 103, 97, + 116, 105, 111, 110, 95, 116, 111, 107, 101, 110, 62, 160, 6, 30, 168, 6, 24, 34, 30, 8, 1, + 16, 128, 8, 26, 1, 124, 34, 2, 47, 47, 40, 2, 58, 3, 114, 111, 119, 66, 3, 78, 97, 78, 160, + 6, 30, 168, 6, 24, 42, 10, 10, 3, 32, 197, 24, 16, 142, 8, 24, 1, 50, 4, 116, 101, 115, + 116, 160, 6, 30, 168, 6, 24, + ]; + + let want = || mt::principal::UserStageInfo { + stage_name: "webhdfs://path/to/stage/files".to_string(), + stage_type: mt::principal::StageType::External, + stage_params: mt::principal::StageParams { + storage: mt::storage::StorageParams::Webhdfs(mt::storage::StorageWebhdfsConfig { + endpoint_url: "https://webhdfs.example.com".to_string(), + root: "/path/to/stage/files".to_string(), + delegation: "".to_string(), + }), + }, + file_format_options: mt::principal::FileFormatOptions { + format: mt::principal::StageFileFormatType::Json, + skip_header: 1024, + field_delimiter: "|".to_string(), + record_delimiter: "//".to_string(), + nan_display: "NaN".to_string(), + escape: "".to_string(), + compression: mt::principal::StageFileCompression::Bz2, + row_tag: "row".to_string(), + quote: "".to_string(), + name: None, + }, + copy_options: mt::principal::CopyOptions { + on_error: mt::principal::OnErrorMode::SkipFileNum(3141), + size_limit: 1038, + split_size: 0, + purge: true, + single: false, + max_file_size: 0, + }, + comment: "test".to_string(), + ..Default::default() + }; + common::test_load_old(func_name!(), user_stage_info_v30.as_slice(), 30, want())?; + common::test_pb_from_to(func_name!(), want())?; + + Ok(()) +} + #[test] fn test_user_stage_fs_v22() -> anyhow::Result<()> { // Encoded data of version 21 of user_stage_fs: diff --git a/src/meta/proto-conv/tests/it/v030_user_stage.rs b/src/meta/proto-conv/tests/it/v030_user_stage.rs new file mode 100644 index 0000000000000..8d16804cbc1d6 --- /dev/null +++ b/src/meta/proto-conv/tests/it/v030_user_stage.rs @@ -0,0 +1,84 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_meta_app as mt; +use common_meta_app::storage::StorageParams; +use common_meta_app::storage::StorageWebhdfsConfig; + +use crate::common; + +// These bytes are built when a new version in introduced, +// and are kept for backward compatibility test. +// +// ************************************************************* +// * These messages should never be updated, * +// * only be added when a new version is added, * +// * or be removed when an old version is no longer supported. * +// ************************************************************* +// +// The message bytes are built from the output of `test_user_stage_webhdfs_latest()` +#[test] +fn test_decode_v30_user_stage() -> anyhow::Result<()> { + // Encoded data of version 30 of common_meta_app::principal::user_stage::UserStageInfo: + // It is generated with common::test_pb_from_to(). + let user_stage_info_v30 = vec![ + 10, 29, 119, 101, 98, 104, 100, 102, 115, 58, 47, 47, 112, 97, 116, 104, 47, 116, 111, 47, + 115, 116, 97, 103, 101, 47, 102, 105, 108, 101, 115, 16, 1, 26, 81, 10, 79, 42, 77, 10, 27, + 104, 116, 116, 112, 115, 58, 47, 47, 119, 101, 98, 104, 100, 102, 115, 46, 101, 120, 97, + 109, 112, 108, 101, 46, 99, 111, 109, 18, 20, 47, 112, 97, 116, 104, 47, 116, 111, 47, 115, + 116, 97, 103, 101, 47, 102, 105, 108, 101, 115, 26, 18, 60, 100, 101, 108, 101, 103, 97, + 116, 105, 111, 110, 95, 116, 111, 107, 101, 110, 62, 160, 6, 30, 168, 6, 24, 34, 30, 8, 1, + 16, 128, 8, 26, 1, 124, 34, 2, 47, 47, 40, 2, 58, 3, 114, 111, 119, 66, 3, 78, 97, 78, 160, + 6, 30, 168, 6, 24, 42, 10, 10, 3, 32, 197, 24, 16, 142, 8, 24, 1, 50, 4, 116, 101, 115, + 116, 160, 6, 30, 168, 6, 24, + ]; + + let want = || mt::principal::UserStageInfo { + stage_name: "webhdfs://path/to/stage/files".to_string(), + stage_type: mt::principal::StageType::External, + stage_params: mt::principal::StageParams { + storage: StorageParams::Webhdfs(StorageWebhdfsConfig { + endpoint_url: "https://webhdfs.example.com".to_string(), + root: "/path/to/stage/files".to_string(), + delegation: "".to_string(), + }), + }, + file_format_options: mt::principal::FileFormatOptions { + format: mt::principal::StageFileFormatType::Json, + skip_header: 1024, + field_delimiter: "|".to_string(), + record_delimiter: "//".to_string(), + nan_display: "NaN".to_string(), + escape: "".to_string(), + compression: mt::principal::StageFileCompression::Bz2, + row_tag: "row".to_string(), + quote: "".to_string(), + name: None, + }, + copy_options: mt::principal::CopyOptions { + on_error: mt::principal::OnErrorMode::SkipFileNum(3141), + size_limit: 1038, + split_size: 0, + purge: true, + single: false, + max_file_size: 0, + }, + comment: "test".to_string(), + ..Default::default() + }; + common::test_load_old(func_name!(), user_stage_info_v30.as_slice(), 30, want())?; + common::test_pb_from_to(func_name!(), want())?; + + Ok(()) +} diff --git a/src/meta/protos/proto/config.proto b/src/meta/protos/proto/config.proto index 76e4184aee602..b60de63678183 100644 --- a/src/meta/protos/proto/config.proto +++ b/src/meta/protos/proto/config.proto @@ -61,3 +61,15 @@ message OssStorageConfig { string access_key_id = 4; string access_key_secret = 5; } + +message WebhdfsStorageConfig { + uint64 version = 100; + uint64 min_reader_ver = 101; + + string endpoint_url = 1; + string root = 2; + string delegation = 3; + + string username = 4; // reserved for future use + string password = 5; // reserved for future use +} diff --git a/src/meta/protos/proto/user.proto b/src/meta/protos/proto/user.proto index 2aea78729d73b..07478430f12a5 100644 --- a/src/meta/protos/proto/user.proto +++ b/src/meta/protos/proto/user.proto @@ -136,6 +136,7 @@ message UserStageInfo { FsStorageConfig fs = 2; GcsStorageConfig gcs = 3; OssStorageConfig oss = 4; + WebhdfsStorageConfig webhdfs = 5; } } diff --git a/src/meta/raft-store/Cargo.toml b/src/meta/raft-store/Cargo.toml index 4f4833d6112ba..45222dd50c7cc 100644 --- a/src/meta/raft-store/Cargo.toml +++ b/src/meta/raft-store/Cargo.toml @@ -37,7 +37,6 @@ maplit = "1.0.2" num = "0.4.0" once_cell = "1.15.0" serde = { workspace = true } -serde_json = { workspace = true } tracing = "0.1.36" [dev-dependencies] diff --git a/src/meta/raft-store/src/lib.rs b/src/meta/raft-store/src/lib.rs index 125a6e4354f07..12301f35f1e49 100644 --- a/src/meta/raft-store/src/lib.rs +++ b/src/meta/raft-store/src/lib.rs @@ -19,5 +19,3 @@ pub mod key_spaces; pub mod log; pub mod state; pub mod state_machine; - -pub mod applied_state; diff --git a/src/meta/raft-store/src/state_machine/client_last_resp.rs b/src/meta/raft-store/src/state_machine/client_last_resp.rs index f0e743a216c36..5a331e5e1dba4 100644 --- a/src/meta/raft-store/src/state_machine/client_last_resp.rs +++ b/src/meta/raft-store/src/state_machine/client_last_resp.rs @@ -12,11 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_meta_types::AppliedState; use serde::Deserialize; use serde::Serialize; -use crate::applied_state::AppliedState; - /// Client last response that is stored in SledTree /// raft state: A mapping of client serial IDs to their state info: /// (serial, RaftResponse) diff --git a/src/meta/raft-store/src/state_machine/sm.rs b/src/meta/raft-store/src/state_machine/sm.rs index a3b7cc0940ccd..621626d44d4b2 100644 --- a/src/meta/raft-store/src/state_machine/sm.rs +++ b/src/meta/raft-store/src/state_machine/sm.rs @@ -34,6 +34,7 @@ use common_meta_types::protobuf as pb; use common_meta_types::txn_condition; use common_meta_types::txn_op; use common_meta_types::txn_op_response; +use common_meta_types::AppliedState; use common_meta_types::Change; use common_meta_types::Cmd; use common_meta_types::ConditionResult; @@ -69,7 +70,6 @@ use tracing::debug; use tracing::error; use tracing::info; -use crate::applied_state::AppliedState; use crate::config::RaftConfig; use crate::key_spaces::ClientLastResps; use crate::key_spaces::Expire; diff --git a/src/meta/raft-store/src/state_machine/sm_kv_api_impl.rs b/src/meta/raft-store/src/state_machine/sm_kv_api_impl.rs index 07b69fc3fee99..92d0cbefea034 100644 --- a/src/meta/raft-store/src/state_machine/sm_kv_api_impl.rs +++ b/src/meta/raft-store/src/state_machine/sm_kv_api_impl.rs @@ -17,6 +17,7 @@ use common_meta_kvapi::kvapi::GetKVReply; use common_meta_kvapi::kvapi::MGetKVReply; use common_meta_kvapi::kvapi::UpsertKVReply; use common_meta_kvapi::kvapi::UpsertKVReq; +use common_meta_types::AppliedState; use common_meta_types::Cmd; use common_meta_types::MetaError; use common_meta_types::SeqV; @@ -25,7 +26,6 @@ use common_meta_types::TxnRequest; use common_meta_types::UpsertKV; use tracing::debug; -use crate::applied_state::AppliedState; use crate::state_machine::StateMachine; #[async_trait::async_trait] diff --git a/src/meta/raft-store/tests/it/state_machine/mod.rs b/src/meta/raft-store/tests/it/state_machine/mod.rs index 3508543842af4..ad19caececdc7 100644 --- a/src/meta/raft-store/tests/it/state_machine/mod.rs +++ b/src/meta/raft-store/tests/it/state_machine/mod.rs @@ -17,9 +17,9 @@ use std::time::UNIX_EPOCH; use common_base::base::tokio; use common_meta_kvapi::kvapi::KVApi; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_raft_store::state_machine::StateMachine; use common_meta_sled_store::openraft; +use common_meta_types::AppliedState; use common_meta_types::Change; use common_meta_types::Cmd; use common_meta_types::Endpoint; diff --git a/src/meta/service/src/message.rs b/src/meta/service/src/message.rs index 3bdecfa0ec7a1..db551c3087dc1 100644 --- a/src/meta/service/src/message.rs +++ b/src/meta/service/src/message.rs @@ -18,9 +18,9 @@ use common_meta_kvapi::kvapi::ListKVReply; use common_meta_kvapi::kvapi::ListKVReq; use common_meta_kvapi::kvapi::MGetKVReply; use common_meta_kvapi::kvapi::MGetKVReq; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_sled_store::openraft::NodeId; use common_meta_types::protobuf::RaftRequest; +use common_meta_types::AppliedState; use common_meta_types::Endpoint; use common_meta_types::LogEntry; diff --git a/src/meta/service/src/meta_service/meta_leader.rs b/src/meta/service/src/meta_service/meta_leader.rs index 1a6a2c3aea218..c95310905b48a 100644 --- a/src/meta/service/src/meta_service/meta_leader.rs +++ b/src/meta/service/src/meta_service/meta_leader.rs @@ -16,10 +16,10 @@ use std::sync::Arc; use common_base::base::tokio::sync::RwLockReadGuard; use common_meta_kvapi::kvapi::KVApi; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_raft_store::state_machine::StateMachine; use common_meta_sled_store::openraft::error::RemoveLearnerError; use common_meta_stoerr::MetaStorageError; +use common_meta_types::AppliedState; use common_meta_types::Cmd; use common_meta_types::LogEntry; use common_meta_types::MetaDataError; diff --git a/src/meta/service/src/meta_service/meta_node_kv_api_impl.rs b/src/meta/service/src/meta_service/meta_node_kv_api_impl.rs index a0cf2d5d39ab6..b9d9953c8aa22 100644 --- a/src/meta/service/src/meta_service/meta_node_kv_api_impl.rs +++ b/src/meta/service/src/meta_service/meta_node_kv_api_impl.rs @@ -22,7 +22,7 @@ use common_meta_kvapi::kvapi::MGetKVReply; use common_meta_kvapi::kvapi::MGetKVReq; use common_meta_kvapi::kvapi::UpsertKVReply; use common_meta_kvapi::kvapi::UpsertKVReq; -use common_meta_raft_store::applied_state::AppliedState; +use common_meta_types::AppliedState; use common_meta_types::Cmd; use common_meta_types::LogEntry; use common_meta_types::MetaAPIError; diff --git a/src/meta/service/src/meta_service/raftmeta.rs b/src/meta/service/src/meta_service/raftmeta.rs index 08403be13d732..ae9cb42cc273d 100644 --- a/src/meta/service/src/meta_service/raftmeta.rs +++ b/src/meta/service/src/meta_service/raftmeta.rs @@ -29,7 +29,6 @@ use common_base::base::tokio::time::Instant; use common_grpc::ConnectionFactory; use common_grpc::DNSResolver; use common_meta_client::reply_to_api_result; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_raft_store::config::RaftConfig; use common_meta_raft_store::key_spaces::GenericKV; use common_meta_sled_store::openraft; @@ -41,6 +40,7 @@ use common_meta_stoerr::MetaStorageError; use common_meta_types::protobuf::raft_service_client::RaftServiceClient; use common_meta_types::protobuf::raft_service_server::RaftServiceServer; use common_meta_types::protobuf::WatchRequest; +use common_meta_types::AppliedState; use common_meta_types::Cmd; use common_meta_types::ConnectionError; use common_meta_types::Endpoint; diff --git a/src/meta/service/src/store/mod.rs b/src/meta/service/src/store/mod.rs index 62e9210b68afb..ce55c66d03bfe 100644 --- a/src/meta/service/src/store/mod.rs +++ b/src/meta/service/src/store/mod.rs @@ -15,8 +15,8 @@ mod store_bare; mod to_storage_error; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_sled_store::openraft::StoreExt; +use common_meta_types::AppliedState; use common_meta_types::LogEntry; pub use store_bare::RaftStoreBare; pub use to_storage_error::ToStorageError; diff --git a/src/meta/service/src/store/store_bare.rs b/src/meta/service/src/store/store_bare.rs index da6bca334b693..3db329aec7cf4 100644 --- a/src/meta/service/src/store/store_bare.rs +++ b/src/meta/service/src/store/store_bare.rs @@ -20,7 +20,6 @@ use std::ops::RangeBounds; use anyerror::AnyError; use common_base::base::tokio::sync::RwLock; use common_base::base::tokio::sync::RwLockWriteGuard; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_raft_store::config::RaftConfig; use common_meta_raft_store::log::RaftLog; use common_meta_raft_store::state::RaftState; @@ -36,6 +35,7 @@ use common_meta_sled_store::openraft::ErrorVerb; use common_meta_sled_store::openraft::Membership; use common_meta_sled_store::openraft::StateMachineChanges; use common_meta_stoerr::MetaStorageError; +use common_meta_types::AppliedState; use common_meta_types::Endpoint; use common_meta_types::LogEntry; use common_meta_types::MetaError; diff --git a/src/meta/service/tests/it/store.rs b/src/meta/service/tests/it/store.rs index 570e3149b54e1..2512e21199dda 100644 --- a/src/meta/service/tests/it/store.rs +++ b/src/meta/service/tests/it/store.rs @@ -16,7 +16,6 @@ use std::sync::Arc; use std::sync::Mutex; use common_base::base::tokio; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_raft_store::state_machine::testing::pretty_snapshot; use common_meta_raft_store::state_machine::testing::snapshot_logs; use common_meta_raft_store::state_machine::SerializableSnapshot; @@ -30,6 +29,7 @@ use common_meta_sled_store::openraft::LogId; use common_meta_sled_store::openraft::Membership; use common_meta_sled_store::openraft::RaftStorage; use common_meta_sled_store::openraft::StorageHelper; +use common_meta_types::AppliedState; use common_meta_types::LogEntry; use databend_meta::init_meta_ut; use databend_meta::store::RaftStoreBare; diff --git a/src/meta/service/tests/it/tests/meta_node.rs b/src/meta/service/tests/it/tests/meta_node.rs index b6b5730b5e202..268a300ef866e 100644 --- a/src/meta/service/tests/it/tests/meta_node.rs +++ b/src/meta/service/tests/it/tests/meta_node.rs @@ -18,9 +18,9 @@ use std::collections::BTreeSet; use std::sync::Arc; use std::time::Duration; -use common_meta_raft_store::applied_state::AppliedState; use common_meta_sled_store::openraft::NodeId; use common_meta_sled_store::openraft::State; +use common_meta_types::AppliedState; use common_meta_types::Node; use databend_meta::meta_service::MetaNode; use databend_meta::Opened; diff --git a/src/meta/raft-store/src/applied_state.rs b/src/meta/types/src/applied_state.rs similarity index 96% rename from src/meta/raft-store/src/applied_state.rs rename to src/meta/types/src/applied_state.rs index cf39157ea83f3..c136134d78bfd 100644 --- a/src/meta/raft-store/src/applied_state.rs +++ b/src/meta/types/src/applied_state.rs @@ -15,10 +15,10 @@ use std::fmt; use std::fmt::Formatter; -use common_meta_types::protobuf::RaftReply; -use common_meta_types::Change; -use common_meta_types::Node; -use common_meta_types::TxnReply; +use crate::protobuf::RaftReply; +use crate::Change; +use crate::Node; +use crate::TxnReply; /// The state of an applied raft log. /// Normally it includes two fields: the state before applying and the state after applying the log. diff --git a/src/meta/types/src/lib.rs b/src/meta/types/src/lib.rs index bc940ae42ec7a..45a9b1ff66148 100644 --- a/src/meta/types/src/lib.rs +++ b/src/meta/types/src/lib.rs @@ -18,6 +18,7 @@ //! This crate defines data types used in meta data storage service. +mod applied_state; mod change; mod cluster; mod cmd; @@ -48,6 +49,7 @@ pub mod protobuf { pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("meta_descriptor"); } +pub use applied_state::AppliedState; pub use change::Change; pub use cluster::Node; pub use cluster::NodeInfo; diff --git a/src/query/ast/src/ast/format/syntax/query.rs b/src/query/ast/src/ast/format/syntax/query.rs index 2f597efa19828..e4bbec89bd2ce 100644 --- a/src/query/ast/src/ast/format/syntax/query.rs +++ b/src/query/ast/src/ast/format/syntax/query.rs @@ -334,19 +334,7 @@ pub(crate) fn pretty_table(table: TableReference) -> RcDoc<'static> { options, alias, } => RcDoc::text(location.to_string()) - .append(if let Some(files) = options.files { - let files = files.join(","); - let files = format!("FILES {}", files); - RcDoc::text(files) - } else { - RcDoc::nil() - }) - .append(if let Some(pattern) = options.pattern { - let pattern = format!("Pattern {pattern}"); - RcDoc::text(pattern) - } else { - RcDoc::nil() - }) + .append(options.to_string()) .append(if let Some(a) = alias { RcDoc::text(format!(" AS {a}")) } else { diff --git a/src/query/ast/src/ast/query.rs b/src/query/ast/src/ast/query.rs index ce0c6ad562802..f5c56fdae7a30 100644 --- a/src/query/ast/src/ast/query.rs +++ b/src/query/ast/src/ast/query.rs @@ -372,13 +372,7 @@ impl Display for TableReference { alias, } => { write!(f, "{location}")?; - if let Some(files) = &options.files { - let files = files.join(","); - write!(f, " FILES {files}")?; - } - if let Some(pattern) = &options.pattern { - write!(f, " PATTERN {pattern}")?; - } + write!(f, "{options}")?; if let Some(alias) = alias { write!(f, " AS {alias}")?; } diff --git a/src/query/ast/src/ast/statements/copy.rs b/src/query/ast/src/ast/statements/copy.rs index 5c8e7573e9871..2da4e32ed97ee 100644 --- a/src/query/ast/src/ast/statements/copy.rs +++ b/src/query/ast/src/ast/statements/copy.rs @@ -348,7 +348,7 @@ impl Display for FileLocation { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { FileLocation::Uri(loc) => { - write!(f, "{}", loc) + write!(f, "'{}'", loc) } FileLocation::Stage(loc) => { write!(f, "{}", loc) diff --git a/src/query/ast/src/ast/statements/stage.rs b/src/query/ast/src/ast/statements/stage.rs index d0e347c65f524..b425b475d2ac5 100644 --- a/src/query/ast/src/ast/statements/stage.rs +++ b/src/query/ast/src/ast/statements/stage.rs @@ -90,6 +90,58 @@ pub struct SelectStageOptions { pub connection: BTreeMap, } +// SELECT FROM +// {@[/] | ''} [( +// [ PARTTERN => ''] +// [ FILE_FORMAT => ''] +// [ FILES => ( 'file_name' [ , 'file_name' ... ] ) ] +// [ ENDPOINT_URL => <'url'> ] +// [ AWS_KEY_ID => <'aws_key_id'> ] +// [ AWS_KEY_SECRET => <'aws_key_secret'> ] +// [ ACCESS_KEY_ID => <'access_key_id'> ] +// [ ACCESS_KEY_SECRET => <'access_key_secret'> ] +// [ SECRET_ACCESS_KEY => <'secret_access_key'> ] +// [ SESSION_TOKEN => <'session_token'> ] +// [ REGION => <'region'> ] +// [ ENABLE_VIRTUAL_HOST_STYLE => true|false ] +// )] +impl Display for SelectStageOptions { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, " (")?; + + let mut output: Vec = vec![]; + if let Some(files) = self.files.clone() { + let files = files + .iter() + .map(|x| format!("'{}'", x)) + .collect::>(); + let files = files.join(","); + let files = format!("FILES => ({})", files); + output.push(files); + } + + if let Some(file_format) = self.file_format.clone() { + let file_format = format!("FILE_FORMAT => '{}'", file_format); + output.push(file_format); + } + + if let Some(pattern) = self.pattern.clone() { + let pattern = format!("PATTERN => '{}'", pattern); + output.push(pattern); + } + + if !self.connection.is_empty() { + for (k, v) in self.connection.iter() { + output.push(format!(" {} => '{}'", k, v)); + } + } + + let output = output.join(","); + write!(f, "{output})")?; + Ok(()) + } +} + impl SelectStageOptions { pub fn from(opts: Vec) -> Self { let mut options: SelectStageOptions = Default::default(); diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index edc2157e7901d..7caab997bba91 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -326,8 +326,8 @@ impl<'a, I: Iterator>> PrattParser for ExprP ExprElement::UnaryOp { op } => match op { UnaryOperator::Not => Affix::Prefix(Precedence(NOT_PREC)), - UnaryOperator::Plus => Affix::Prefix(Precedence(30)), - UnaryOperator::Minus => Affix::Prefix(Precedence(30)), + UnaryOperator::Plus => Affix::Prefix(Precedence(50)), + UnaryOperator::Minus => Affix::Prefix(Precedence(50)), }, ExprElement::BinaryOp { op } => match op { BinaryOperator::Or => Affix::Infix(Precedence(5), Associativity::Left), @@ -362,7 +362,7 @@ impl<'a, I: Iterator>> PrattParser for ExprP BinaryOperator::Modulo => Affix::Infix(Precedence(40), Associativity::Left), BinaryOperator::StringConcat => Affix::Infix(Precedence(40), Associativity::Left), }, - ExprElement::PgCast { .. } => Affix::Postfix(Precedence(50)), + ExprElement::PgCast { .. } => Affix::Postfix(Precedence(60)), _ => Affix::Nilfix, }; Ok(affix) diff --git a/src/query/ast/src/parser/stage.rs b/src/query/ast/src/parser/stage.rs index 51e29a93060a0..c21eae3dd9107 100644 --- a/src/query/ast/src/parser/stage.rs +++ b/src/query/ast/src/parser/stage.rs @@ -55,6 +55,8 @@ fn connection_opt(sep: &'static str) -> impl FnMut(Input) -> IResult<(String, St | SECRET_ACCESS_KEY | SESSION_TOKEN | REGION + | HTTPS + | DELEGATION | ENABLE_VIRTUAL_HOST_STYLE) ~ #sep1 ~ #literal_string }, diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 478963b42bb03..eaa20bcf2e63c 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -363,6 +363,8 @@ pub enum TokenKind { DEFAULT, #[token("DEFLATE", ignore(ascii_case))] DEFLATE, + #[token("DELEGATION", ignore(ascii_case))] + DELEGATION, // delegation token, used in webhdfs #[token("DELETE", ignore(ascii_case))] DELETE, #[token("DESC", ignore(ascii_case))] @@ -475,6 +477,8 @@ pub enum TokenKind { HIVE, #[token("HOUR", ignore(ascii_case))] HOUR, + #[token("HTTPS", ignore(ascii_case))] + HTTPS, #[token("ICEBERG", ignore(ascii_case))] ICEBERG, #[token("INTERSECT", ignore(ascii_case))] diff --git a/src/query/ast/tests/it/testdata/expr.txt b/src/query/ast/tests/it/testdata/expr.txt index f62369076704c..aa2d3a0a23946 100644 --- a/src/query/ast/tests/it/testdata/expr.txt +++ b/src/query/ast/tests/it/testdata/expr.txt @@ -730,39 +730,39 @@ FunctionCall { ---------- Input ---------- - - + + - 1 + + - 2 ---------- Output --------- -(- (- (+ (+ (- (1 + (+ (- 2)))))))) +((- (- (+ (+ (- 1))))) + (+ (- 2))) ---------- AST ------------ -UnaryOp { +BinaryOp { span: Some( - 0..1, + 12..13, ), - op: Minus, - expr: UnaryOp { + op: Plus, + left: UnaryOp { span: Some( - 2..3, + 0..1, ), op: Minus, expr: UnaryOp { span: Some( - 4..5, + 2..3, ), - op: Plus, + op: Minus, expr: UnaryOp { span: Some( - 6..7, + 4..5, ), op: Plus, expr: UnaryOp { span: Some( - 8..9, + 6..7, ), - op: Minus, - expr: BinaryOp { + op: Plus, + expr: UnaryOp { span: Some( - 12..13, + 8..9, ), - op: Plus, - left: Literal { + op: Minus, + expr: Literal { span: Some( 10..11, ), @@ -770,31 +770,31 @@ UnaryOp { 1, ), }, - right: UnaryOp { - span: Some( - 14..15, - ), - op: Plus, - expr: UnaryOp { - span: Some( - 16..17, - ), - op: Minus, - expr: Literal { - span: Some( - 18..19, - ), - lit: Integer( - 2, - ), - }, - }, - }, }, }, }, }, }, + right: UnaryOp { + span: Some( + 14..15, + ), + op: Plus, + expr: UnaryOp { + span: Some( + 16..17, + ), + op: Minus, + expr: Literal { + span: Some( + 18..19, + ), + lit: Integer( + 2, + ), + }, + }, + }, } diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 8c5dbcbcb8796..832c4303b559e 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -262,6 +262,10 @@ impl From for StorageConfig { storage_num_cpus: inner.num_cpus, storage_type: "".to_string(), allow_insecure: inner.allow_insecure, + // use default for each config instead of using `..Default::default` + // using `..Default::default` is calling `Self::default` + // and `Self::default` relies on `InnerStorage::into()` + // this will lead to a stack overflow fs: Default::default(), gcs: Default::default(), s3: Default::default(), diff --git a/src/query/expression/Cargo.toml b/src/query/expression/Cargo.toml index e1c982533cd55..439c68f34ab1c 100755 --- a/src/query/expression/Cargo.toml +++ b/src/query/expression/Cargo.toml @@ -39,7 +39,6 @@ micromarshal = "0.2.1" num-traits = "0.2.15" once_cell = "1.15.0" ordered-float = { workspace = true, features = ["serde", "rand"] } -primitive-types = "0.12.0" rand = { version = "0.8.5", features = ["small_rng"] } rust_decimal = "1.26" serde = { workspace = true } diff --git a/src/query/expression/src/block.rs b/src/query/expression/src/block.rs index c231fc3592c7f..84980f7be322f 100644 --- a/src/query/expression/src/block.rs +++ b/src/query/expression/src/block.rs @@ -53,9 +53,8 @@ pub struct BlockEntry { } #[typetag::serde(tag = "type")] -pub trait BlockMetaInfo: Debug + Send + Sync { +pub trait BlockMetaInfo: Debug + Send + Sync + 'static { fn as_any(&self) -> &dyn Any; - fn as_mut_any(&mut self) -> &mut dyn Any; #[allow(clippy::borrowed_box)] fn equals(&self, info: &Box) -> bool; @@ -63,6 +62,38 @@ pub trait BlockMetaInfo: Debug + Send + Sync { fn clone_self(&self) -> Box; } +pub trait BlockMetaInfoDowncast: Sized { + fn downcast_from(boxed: BlockMetaInfoPtr) -> Option; + + fn downcast_ref_from(boxed: &BlockMetaInfoPtr) -> Option<&Self>; +} + +impl BlockMetaInfoDowncast for T { + fn downcast_from(boxed: BlockMetaInfoPtr) -> Option { + if boxed.as_any().is::() { + unsafe { + // SAFETY: `is` ensures this type cast is correct + let raw_ptr = Box::into_raw(boxed) as *const dyn BlockMetaInfo; + return Some(std::ptr::read(raw_ptr as *const Self)); + } + } + + None + } + + fn downcast_ref_from(boxed: &BlockMetaInfoPtr) -> Option<&Self> { + if boxed.as_any().is::() { + unsafe { + // SAFETY: `is` ensures this type cast is correct + let unboxed = boxed.as_ref(); + return Some(&*(unboxed as *const dyn BlockMetaInfo as *const Self)); + } + } + + None + } +} + impl DataBlock { #[inline] pub fn new(columns: Vec, num_rows: usize) -> Self { diff --git a/src/query/expression/src/deserializations/decimal.rs b/src/query/expression/src/deserializations/decimal.rs index ff321a8a7f3ed..1d93346e474a0 100644 --- a/src/query/expression/src/deserializations/decimal.rs +++ b/src/query/expression/src/deserializations/decimal.rs @@ -85,8 +85,10 @@ impl TypeDeserializer for DecimalDeserializer { // See GroupHash.rs for StringColumn #[allow(clippy::uninit_vec)] - fn de_binary(&mut self, _reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { - todo!() + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { + let t: T = T::de_binary(reader); + self.values.push(t); + Ok(()) } fn de_default(&mut self) { @@ -95,12 +97,17 @@ impl TypeDeserializer for DecimalDeserializer { fn de_fixed_binary_batch( &mut self, - _reader: &[u8], - _step: usize, - _rows: usize, + reader: &[u8], + step: usize, + rows: usize, _format: &FormatSettings, ) -> Result<()> { - todo!() + for row in 0..rows { + let mut row_reader = &reader[step * row..]; + let value: T = T::de_binary(&mut row_reader); + self.values.push(value); + } + Ok(()) } fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs index e97dcf5366d34..25cd924b2becd 100644 --- a/src/query/expression/src/evaluator.rs +++ b/src/query/expression/src/evaluator.rs @@ -13,8 +13,6 @@ // limitations under the License. use std::collections::HashMap; -#[cfg(debug_assertions)] -use std::sync::Mutex; use common_arrow::arrow::bitmap; use common_exception::ErrorCode; @@ -168,30 +166,31 @@ impl<'a> Evaluator<'a> { } }; - #[cfg(debug_assertions)] - if result.is_err() { - static RECURSING: Mutex = Mutex::new(false); - if !*RECURSING.lock().unwrap() { - *RECURSING.lock().unwrap() = true; - assert_eq!( - ConstantFolder::fold_with_domain( - expr, - self.input_columns - .domains() - .into_iter() - .enumerate() - .collect(), - self.func_ctx, - self.fn_registry - ) - .1, - None, - "domain calculation should not return any domain for expressions that are possible to fail" - ); - *RECURSING.lock().unwrap() = false; - } - } - + // We can't call this in debug mode, because it will cause infinite recursion. + // Eg: select 3.2::Decimal(10, 2)::Int32; + // #[cfg(debug_assertions)] + // if result.is_err() { + // static RECURSING: Mutex = Mutex::new(false); + // if !*RECURSING.lock().unwrap() { + // *RECURSING.lock().unwrap() = true; + // assert_eq!( + // ConstantFolder::fold_with_domain( + // expr, + // self.input_columns + // .domains() + // .into_iter() + // .enumerate() + // .collect(), + // self.func_ctx, + // self.fn_registry + // ) + // .1, + // None, + // "domain calculation should not return any domain for expressions that are possible to fail" + // ); + // *RECURSING.lock().unwrap() = false; + // } + // } result } diff --git a/src/query/expression/src/kernels/group_by.rs b/src/query/expression/src/kernels/group_by.rs index 1f6ef3ce9ec1c..cab915bc66306 100644 --- a/src/query/expression/src/kernels/group_by.rs +++ b/src/query/expression/src/kernels/group_by.rs @@ -25,7 +25,6 @@ use crate::types::DataType; use crate::DataBlock; use crate::HashMethodKeysU128; use crate::HashMethodKeysU256; -use crate::HashMethodKeysU512; impl DataBlock { pub fn choose_hash_method(chunk: &DataBlock, indices: &[usize]) -> Result { @@ -55,7 +54,10 @@ impl DataBlock { for typ in hash_key_types { let not_null_type = typ.remove_nullable(); - if not_null_type.is_numeric() || not_null_type.is_date_or_date_time() { + if not_null_type.is_numeric() + || not_null_type.is_date_or_date_time() + || not_null_type.is_decimal() + { group_key_len += not_null_type.numeric_byte_size().unwrap(); // extra one byte for null flag @@ -74,7 +76,6 @@ impl DataBlock { 5..=8 => Ok(HashMethodKind::KeysU64(HashMethodKeysU64::default())), 9..=16 => Ok(HashMethodKind::KeysU128(HashMethodKeysU128::default())), 17..=32 => Ok(HashMethodKind::KeysU256(HashMethodKeysU256::default())), - 33..=64 => Ok(HashMethodKind::KeysU512(HashMethodKeysU512::default())), _ => Ok(HashMethodKind::Serializer(HashMethodSerializer::default())), } } diff --git a/src/query/expression/src/kernels/group_by_hash.rs b/src/query/expression/src/kernels/group_by_hash.rs index 26c7d08cf801b..6352acbec28ae 100644 --- a/src/query/expression/src/kernels/group_by_hash.rs +++ b/src/query/expression/src/kernels/group_by_hash.rs @@ -24,20 +24,25 @@ use common_exception::Result; use common_hashtable::FastHash; use common_io::prelude::BinaryWrite; use common_io::prelude::FormatSettings; +use ethnum::i256; +use ethnum::u256; +use ethnum::U256; use micromarshal::Marshal; -use primitive_types::U256; -use primitive_types::U512; use crate::types::boolean::BooleanType; +use crate::types::decimal::Decimal; +use crate::types::decimal::DecimalColumn; use crate::types::nullable::NullableColumn; use crate::types::number::Number; use crate::types::number::NumberColumn; use crate::types::string::StringColumnBuilder; use crate::types::string::StringIterator; use crate::types::DataType; +use crate::types::DecimalDataType; use crate::types::NumberDataType; use crate::types::NumberType; use crate::types::ValueType; +use crate::with_decimal_mapped_type; use crate::with_integer_mapped_type; use crate::with_number_mapped_type; use crate::Column; @@ -46,9 +51,8 @@ use crate::TypeDeserializer; #[derive(Debug)] pub enum KeysState { Column(Column), - U128(Vec), - U256(Vec), - U512(Vec), + U128(Buffer), + U256(Buffer), } pub trait HashMethod: Clone + Sync + Send + 'static { @@ -73,8 +77,7 @@ pub type HashMethodKeysU16 = HashMethodFixedKeys; pub type HashMethodKeysU32 = HashMethodFixedKeys; pub type HashMethodKeysU64 = HashMethodFixedKeys; pub type HashMethodKeysU128 = HashMethodFixedKeys; -pub type HashMethodKeysU256 = HashMethodFixedKeys; -pub type HashMethodKeysU512 = HashMethodFixedKeys; +pub type HashMethodKeysU256 = HashMethodFixedKeys; /// These methods are `generic` method to generate hash key, /// that is the 'numeric' or 'binary` representation of each column value as hash key. @@ -88,7 +91,6 @@ pub enum HashMethodKind { KeysU64(HashMethodKeysU64), KeysU128(HashMethodKeysU128), KeysU256(HashMethodKeysU256), - KeysU512(HashMethodKeysU512), } #[macro_export] @@ -96,7 +98,7 @@ macro_rules! with_hash_method { ( | $t:tt | $($tail:tt)* ) => { match_template::match_template! { $t = [Serializer, SingleString, KeysU8, KeysU16, - KeysU32, KeysU64, KeysU128, KeysU256, KeysU512], + KeysU32, KeysU64, KeysU128, KeysU256], $($tail)* } } @@ -115,7 +117,6 @@ macro_rules! with_mappedhash_method { KeysU64 => HashMethodKeysU64, KeysU128 => HashMethodKeysU128, KeysU256 => HashMethodKeysU256, - KeysU512 => HashMethodKeysU512 ], $($tail)* } @@ -137,9 +138,12 @@ impl HashMethodKind { HashMethodKind::KeysU16(_) => DataType::Number(NumberDataType::UInt16), HashMethodKind::KeysU32(_) => DataType::Number(NumberDataType::UInt32), HashMethodKind::KeysU64(_) => DataType::Number(NumberDataType::UInt64), - HashMethodKind::KeysU128(_) - | HashMethodKind::KeysU256(_) - | HashMethodKind::KeysU512(_) => DataType::String, + HashMethodKind::KeysU128(_) => { + DataType::Decimal(DecimalDataType::Decimal128(i128::default_decimal_size())) + } + HashMethodKind::KeysU256(_) => { + DataType::Decimal(DecimalDataType::Decimal256(i256::default_decimal_size())) + } } } } @@ -275,7 +279,7 @@ where T: Clone debug_assert!(!keys.is_empty()); // faster path for single signed/unsigned integer to column - if group_items.len() == 1 && group_items[0].1.is_numeric() { + if group_items.len() == 1 { if let DataType::Number(ty) = group_items[0].1 { with_integer_mapped_type!(|NUM_TYPE| match ty { NumberDataType::NUM_TYPE => { @@ -287,6 +291,19 @@ where T: Clone _ => {} }) } + + if matches!(group_items[0].1, DataType::Decimal(_)) { + with_decimal_mapped_type!(|DECIMAL_TYPE| match group_items[0].1 { + DataType::Decimal(DecimalDataType::DECIMAL_TYPE(size)) => { + let buffer: Buffer = keys.into(); + let col = unsafe { + std::mem::transmute::, Buffer>(buffer) + }; + return Ok(vec![DECIMAL_TYPE::upcast_column(col, size)]); + } + _ => {} + }) + } } let mut keys = keys; @@ -444,8 +461,25 @@ macro_rules! impl_hash_method_fixed_large_keys { group_columns: &[(Column, DataType)], rows: usize, ) -> Result { + // faster path for single fixed decimal keys + if group_columns.len() == 1 { + if group_columns[0].1.is_decimal() { + with_decimal_mapped_type!(|DECIMAL_TYPE| match &group_columns[0].0 { + Column::Decimal(DecimalColumn::DECIMAL_TYPE(c, _)) => { + let buffer = unsafe { + std::mem::transmute::, Buffer<$ty>>( + c.clone(), + ) + }; + return Ok(KeysState::$name(buffer)); + } + _ => {} + }) + } + } + let keys = self.build_keys_vec(group_columns, rows)?; - Ok(KeysState::$name(keys)) + Ok(KeysState::$name(keys.into())) } fn build_keys_iter<'a>( @@ -463,7 +497,6 @@ macro_rules! impl_hash_method_fixed_large_keys { impl_hash_method_fixed_large_keys! {u128, U128} impl_hash_method_fixed_large_keys! {U256, U256} -impl_hash_method_fixed_large_keys! {U512, U512} #[inline] fn build( @@ -502,7 +535,13 @@ pub fn serialize_column_binary(column: &Column, row: usize, vec: &mut Vec) { Column::String(v) => { BinaryWrite::write_binary(vec, unsafe { v.index_unchecked(row) }).unwrap() } - Column::Decimal(_) => unreachable!("Decimal is not supported in group by keys format"), + Column::Decimal(_) => { + with_decimal_mapped_type!(|DECIMAL_TYPE| match column { + Column::Decimal(DecimalColumn::DECIMAL_TYPE(v, _)) => + vec.extend_from_slice(v[row].to_le_bytes().as_ref()), + _ => unreachable!(), + }) + } Column::Timestamp(v) => vec.extend_from_slice(v[row].to_le_bytes().as_ref()), Column::Date(v) => vec.extend_from_slice(v[row].to_le_bytes().as_ref()), Column::Array(array) | Column::Map(array) => { diff --git a/src/query/expression/src/property.rs b/src/query/expression/src/property.rs index 1d9587446bcd8..bdc47e5395021 100644 --- a/src/query/expression/src/property.rs +++ b/src/query/expression/src/property.rs @@ -34,6 +34,7 @@ use crate::types::NumberType; use crate::types::StringType; use crate::types::TimestampType; use crate::types::ValueType; +use crate::with_decimal_type; use crate::with_number_type; use crate::Scalar; @@ -193,6 +194,19 @@ impl Domain { _ => unreachable!("unable to merge {this:?} with {other:?}"), }) } + (Domain::Decimal(this), Domain::Decimal(other)) => { + with_decimal_type!(|TYPE| match (this, other) { + (DecimalDomain::TYPE(x, size), DecimalDomain::TYPE(y, _)) => + Domain::Decimal(DecimalDomain::TYPE( + SimpleDomain { + min: x.min.min(y.min), + max: x.max.max(y.max), + }, + *size + ),), + _ => unreachable!("unable to merge {this:?} with {other:?}"), + }) + } (Domain::Boolean(this), Domain::Boolean(other)) => Domain::Boolean(BooleanDomain { has_false: this.has_false || other.has_false, has_true: this.has_true || other.has_true, diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index deff57be834e1..0b0582740da0f 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -25,9 +25,11 @@ use crate::expression::Literal; use crate::expression::RawExpr; use crate::function::FunctionRegistry; use crate::function::FunctionSignature; +use crate::types::decimal::DecimalSize; use crate::types::number::NumberDataType; use crate::types::number::NumberScalar; use crate::types::DataType; +use crate::types::DecimalDataType; use crate::AutoCastRules; use crate::ColumnIndex; use crate::Scalar; @@ -476,7 +478,8 @@ pub fn can_auto_cast_to( .zip(dest_tys) .all(|(src_ty, dest_ty)| can_auto_cast_to(src_ty, dest_ty, auto_cast_rules)) } - (DataType::Number(_) | DataType::Decimal(_), DataType::Decimal(_)) => true, + (DataType::Number(_), DataType::Decimal(_)) => true, + (DataType::Decimal(x), DataType::Decimal(y)) => x.precision() <= y.precision(), _ => false, } } @@ -515,11 +518,24 @@ pub fn common_super_type( .collect::>>()?; Some(DataType::Tuple(tys)) } - // todo!("decimal") - // ( - // DataType::Number(_) | DataType::Decimal(_), - // DataType::Number(_) | DataType::Decimal(_), - // ) => DataType::Decimal(?), + (DataType::Number(_), DataType::Decimal(ty)) + | (DataType::Decimal(ty), DataType::Number(_)) => { + let max_precision = ty.max_precision(); + let scale = ty.scale(); + + DecimalDataType::from_size(DecimalSize { + precision: max_precision, + scale, + }) + .ok() + .map(DataType::Decimal) + } + + (DataType::Decimal(a), DataType::Decimal(b)) => { + let ty = DecimalDataType::binary_result_type(&a, &b, false, false, true).ok(); + ty.map(DataType::Decimal) + } + (ty1, ty2) => { let ty1_can_cast_to = auto_cast_rules .iter() diff --git a/src/query/expression/src/types.rs b/src/query/expression/src/types.rs index 911ea81f47490..5b654afdf55ad 100755 --- a/src/query/expression/src/types.rs +++ b/src/query/expression/src/types.rs @@ -183,6 +183,9 @@ impl DataType { | DataType::Number(NumberDataType::UInt64) | DataType::Number(NumberDataType::Float64) | DataType::Number(NumberDataType::Int64) => Ok(8), + + DataType::Decimal(DecimalDataType::Decimal128(_)) => Ok(16), + DataType::Decimal(DecimalDataType::Decimal256(_)) => Ok(32), _ => Result::Err(format!( "Function number_byte_size argument must be numeric types, but got {:?}", self diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 2d44c21e60b7d..dfe33a776082b 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -88,15 +88,15 @@ impl ValueType for DecimalType { } fn upcast_scalar(scalar: Self::Scalar) -> Scalar { - Num::upcast_scalar(scalar) + Num::upcast_scalar(scalar, Num::default_decimal_size()) } fn upcast_column(col: Self::Column) -> Column { - Num::upcast_column(col) + Num::upcast_column(col, Num::default_decimal_size()) } fn upcast_domain(domain: Self::Domain) -> Domain { - Num::upcast_domain(domain) + Num::upcast_domain(domain, Num::default_decimal_size()) } fn column_len<'a>(col: &'a Self::Column) -> usize { @@ -254,18 +254,22 @@ pub trait Decimal: fn min_for_precision(precision: u8) -> Self; fn max_for_precision(precision: u8) -> Self; + fn default_decimal_size() -> DecimalSize; + fn from_float(value: f64) -> Self; fn from_u64(value: u64) -> Self; fn from_i64(value: i64) -> Self; + fn de_binary(bytes: &mut &[u8]) -> Self; fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)>; fn try_downcast_builder<'a>(builder: &'a mut ColumnBuilder) -> Option<&'a mut Vec>; fn try_downcast_scalar(scalar: &DecimalScalar) -> Option; fn try_downcast_domain(domain: &DecimalDomain) -> Option>; - fn upcast_scalar(scalar: Self) -> Scalar; - fn upcast_column(col: Buffer) -> Column; - fn upcast_domain(domain: SimpleDomain) -> Domain; + + fn upcast_scalar(scalar: Self, size: DecimalSize) -> Scalar; + fn upcast_column(col: Buffer, size: DecimalSize) -> Column; + fn upcast_domain(domain: SimpleDomain, size: DecimalSize) -> Domain; fn data_type() -> DataType; const MIN: Self; const MAX: Self; @@ -341,6 +345,17 @@ impl Decimal for i128 { 9_i128.saturating_pow(1 + to_precision as u32) } + fn default_decimal_size() -> DecimalSize { + DecimalSize { + precision: MAX_DECIMAL128_PRECISION, + scale: 0, + } + } + + fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn { + DecimalColumn::Decimal128(value, size) + } + fn from_float(value: f64) -> Self { value.to_i128().unwrap() } @@ -353,6 +368,14 @@ impl Decimal for i128 { value.to_i128().unwrap() } + fn de_binary(bytes: &mut &[u8]) -> Self { + let bs: [u8; std::mem::size_of::()] = + bytes[0..std::mem::size_of::()].try_into().unwrap(); + *bytes = &bytes[std::mem::size_of::()..]; + + i128::from_le_bytes(bs) + } + fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)> { let column = column.as_decimal()?; match column { @@ -383,25 +406,16 @@ impl Decimal for i128 { } // will mock DecimalSize need modify when use it - fn upcast_scalar(scalar: Self) -> Scalar { - Scalar::Decimal(DecimalScalar::Decimal128(scalar, DecimalSize { - precision: MAX_DECIMAL128_PRECISION, - scale: 0, - })) + fn upcast_scalar(scalar: Self, size: DecimalSize) -> Scalar { + Scalar::Decimal(DecimalScalar::Decimal128(scalar, size)) } - fn upcast_column(col: Buffer) -> Column { - Column::Decimal(DecimalColumn::Decimal128(col, DecimalSize { - precision: MAX_DECIMAL128_PRECISION, - scale: 0, - })) + fn upcast_column(col: Buffer, size: DecimalSize) -> Column { + Column::Decimal(DecimalColumn::Decimal128(col, size)) } - fn upcast_domain(domain: SimpleDomain) -> Domain { - Domain::Decimal(DecimalDomain::Decimal128(domain, DecimalSize { - precision: MAX_DECIMAL128_PRECISION, - scale: 0, - })) + fn upcast_domain(domain: SimpleDomain, size: DecimalSize) -> Domain { + Domain::Decimal(DecimalDomain::Decimal128(domain, size)) } fn data_type() -> DataType { @@ -414,9 +428,6 @@ impl Decimal for i128 { const MIN: i128 = i128::MIN; const MAX: i128 = i128::MAX; - fn to_column_from_buffer(value: Buffer, size: DecimalSize) -> DecimalColumn { - DecimalColumn::Decimal128(value, size) - } } impl Decimal for i256 { @@ -470,6 +481,13 @@ impl Decimal for i256 { (i256::ONE * 9).saturating_pow(1 + to_precision as u32) } + fn default_decimal_size() -> DecimalSize { + DecimalSize { + precision: MAX_DECIMAL256_PRECISION, + scale: 0, + } + } + fn from_float(value: f64) -> Self { i256::from(value.to_i128().unwrap()) } @@ -482,6 +500,14 @@ impl Decimal for i256 { i256::from(value.to_i128().unwrap()) } + fn de_binary(bytes: &mut &[u8]) -> Self { + let bs: [u8; std::mem::size_of::()] = + bytes[0..std::mem::size_of::()].try_into().unwrap(); + *bytes = &bytes[std::mem::size_of::()..]; + + i256::from_le_bytes(bs) + } + fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)> { let column = column.as_decimal()?; match column { @@ -511,29 +537,20 @@ impl Decimal for i256 { } } - fn upcast_scalar(scalar: Self) -> Scalar { - Scalar::Decimal(DecimalScalar::Decimal256(scalar, DecimalSize { - precision: MAX_DECIMAL256_PRECISION, - scale: 0, - })) + fn upcast_scalar(scalar: Self, size: DecimalSize) -> Scalar { + Scalar::Decimal(DecimalScalar::Decimal256(scalar, size)) } - fn upcast_column(col: Buffer) -> Column { - Column::Decimal(DecimalColumn::Decimal256(col, DecimalSize { - precision: MAX_DECIMAL256_PRECISION, - scale: 0, - })) + fn upcast_column(col: Buffer, size: DecimalSize) -> Column { + Column::Decimal(DecimalColumn::Decimal256(col, size)) } - fn upcast_domain(domain: SimpleDomain) -> Domain { - Domain::Decimal(DecimalDomain::Decimal256(domain, DecimalSize { - precision: MAX_DECIMAL256_PRECISION, - scale: 0, - })) + fn upcast_domain(domain: SimpleDomain, size: DecimalSize) -> Domain { + Domain::Decimal(DecimalDomain::Decimal256(domain, size)) } fn data_type() -> DataType { - DataType::Decimal(DecimalDataType::Decimal128(DecimalSize { + DataType::Decimal(DecimalDataType::Decimal256(DecimalSize { precision: MAX_DECIMAL256_PRECISION, scale: 0, })) @@ -628,9 +645,6 @@ impl DecimalDataType { let multiply_precision = a.precision() + b.precision(); let divide_precision = a.precision() + b.scale(); - // for addition/subtraction, we add 1 to the width to ensure we don't overflow - let plus_min_precision = a.leading_digits().max(b.leading_digits()) - scale + 1; - if is_multiply { scale = a.scale() + b.scale(); precision = precision.min(multiply_precision); @@ -639,9 +653,10 @@ impl DecimalDataType { precision = precision.min(divide_precision); } else if is_plus_minus { scale = std::cmp::max(a.scale(), b.scale()); + // for addition/subtraction, we add 1 to the width to ensure we don't overflow + let plus_min_precision = a.leading_digits().max(b.leading_digits()) + scale + 1; precision = precision.min(plus_min_precision); } - Self::from_size(DecimalSize { precision, scale }) } diff --git a/src/query/functions/src/scalars/decimal.rs b/src/query/functions/src/scalars/decimal.rs index 4bbf516ee81f7..7b7f1e726c085 100644 --- a/src/query/functions/src/scalars/decimal.rs +++ b/src/query/functions/src/scalars/decimal.rs @@ -240,7 +240,7 @@ macro_rules! register_decimal_compare_op { DataType::Decimal(return_type.clone()), DataType::Decimal(return_type.clone()), ], - return_type: DataType::Decimal(return_type.clone()), + return_type: DataType::Boolean, property: FunctionProperty::default(), }, calc_domain: Box::new(|_args_domain| FunctionDomain::Full), @@ -281,7 +281,7 @@ macro_rules! register_decimal_binary_op { let is_multiply = $name == "multiply"; let is_divide = $name == "divide"; - let is_plus_minus = $name == "plus" || $name == "minus"; + let is_plus_minus = !is_multiply && !is_divide; let return_type = if args_type[0].is_decimal() && args_type[1].is_decimal() { let lhs_type = args_type[0].as_decimal().unwrap(); @@ -396,6 +396,29 @@ pub fn register(registry: &mut FunctionRegistry) { }), })) }); + + // decimal to float + registry.register_function_factory("to_float64", |_params, args_type| { + if args_type.len() != 1 { + return None; + } + + let arg_type = args_type[0].clone(); + if !arg_type.is_decimal() { + return None; + } + + Some(Arc::new(Function { + signature: FunctionSignature { + name: "to_float64".to_string(), + args_type: vec![arg_type.clone()], + return_type: Float64Type::data_type(), + property: FunctionProperty::default(), + }, + calc_domain: Box::new(|_args_domain| FunctionDomain::Full), + eval: Box::new(move |args, tx| decimal_to_float64(args, arg_type.clone(), tx)), + })) + }); } fn convert_to_decimal( @@ -662,3 +685,53 @@ fn decimal_to_decimal( Value::Column(Column::Decimal(result)) } } + +fn decimal_to_float64( + args: &[ValueRef], + from_type: DataType, + _ctx: &mut EvalContext, +) -> Value { + let arg = &args[0]; + + let mut is_scalar = false; + let column = match arg { + ValueRef::Column(column) => column.clone(), + ValueRef::Scalar(s) => { + is_scalar = true; + let builder = ColumnBuilder::repeat(s, 1, &from_type); + builder.build() + } + }; + + let from_type = from_type.as_decimal().unwrap(); + + let result = match from_type { + DecimalDataType::Decimal128(_) => { + let (buffer, from_size) = i128::try_downcast_column(&column).unwrap(); + + let div = 10_f64.powi(from_size.scale as i32); + + let values: Buffer = buffer.iter().map(|x| (*x as f64 / div).into()).collect(); + Float64Type::upcast_column(values) + } + + DecimalDataType::Decimal256(_) => { + let (buffer, from_size) = i256::try_downcast_column(&column).unwrap(); + + let div = 10_f64.powi(from_size.scale as i32); + + let values: Buffer = buffer + .iter() + .map(|x| (f64::from(*x) / div).into()) + .collect(); + Float64Type::upcast_column(values) + } + }; + + if is_scalar { + let scalar = result.index(0).unwrap(); + Value::Scalar(scalar.to_owned()) + } else { + Value::Column(result) + } +} diff --git a/src/query/functions/src/scalars/hash.rs b/src/query/functions/src/scalars/hash.rs index 7739ee71f7b85..3eb6fcf42d7a0 100644 --- a/src/query/functions/src/scalars/hash.rs +++ b/src/query/functions/src/scalars/hash.rs @@ -18,6 +18,7 @@ use std::collections::hash_map::DefaultHasher; use std::hash::Hash; use std::hash::Hasher; +use common_expression::types::decimal::DecimalType; use common_expression::types::number::NumberScalar; use common_expression::types::number::F32; use common_expression::types::number::F64; @@ -39,6 +40,7 @@ use common_expression::FunctionDomain; use common_expression::FunctionProperty; use common_expression::FunctionRegistry; use common_expression::Scalar; +use ethnum::i256; use md5::Digest; use md5::Md5 as Md5Hasher; use naive_cityhash::cityhash64_with_seed; @@ -66,6 +68,10 @@ pub fn register(registry: &mut FunctionRegistry) { }); } + // Decimal types we only register the default type size + register_simple_domain_type_hash::>(registry); + register_simple_domain_type_hash::>(registry); + registry.register_passthrough_nullable_1_arg::( "md5", FunctionProperty::default(), @@ -304,13 +310,21 @@ macro_rules! for_all_integer_types{ { u8 }, { u16 }, { u32 }, - { u64 } + { u64 }, + { i128 } } }; } for_all_integer_types! { integer_impl } +impl DFHash for i256 { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(self.0.as_slice(), state); + } +} + impl DFHash for F32 { #[inline] fn hash(&self, state: &mut H) { diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 04333a1209c38..1807734d96b44 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -431,6 +431,46 @@ city64withseed(Float64, Float32) :: UInt64 city64withseed(Float64 NULL, Float32 NULL) :: UInt64 NULL city64withseed(Float64, Float64) :: UInt64 city64withseed(Float64 NULL, Float64 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), UInt8) :: UInt64 +city64withseed(Decimal(38, 0) NULL, UInt8 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), UInt16) :: UInt64 +city64withseed(Decimal(38, 0) NULL, UInt16 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), UInt32) :: UInt64 +city64withseed(Decimal(38, 0) NULL, UInt32 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), UInt64) :: UInt64 +city64withseed(Decimal(38, 0) NULL, UInt64 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), Int8) :: UInt64 +city64withseed(Decimal(38, 0) NULL, Int8 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), Int16) :: UInt64 +city64withseed(Decimal(38, 0) NULL, Int16 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), Int32) :: UInt64 +city64withseed(Decimal(38, 0) NULL, Int32 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), Int64) :: UInt64 +city64withseed(Decimal(38, 0) NULL, Int64 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), Float32) :: UInt64 +city64withseed(Decimal(38, 0) NULL, Float32 NULL) :: UInt64 NULL +city64withseed(Decimal(38, 0), Float64) :: UInt64 +city64withseed(Decimal(38, 0) NULL, Float64 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), UInt8) :: UInt64 +city64withseed(Decimal(76, 0) NULL, UInt8 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), UInt16) :: UInt64 +city64withseed(Decimal(76, 0) NULL, UInt16 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), UInt32) :: UInt64 +city64withseed(Decimal(76, 0) NULL, UInt32 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), UInt64) :: UInt64 +city64withseed(Decimal(76, 0) NULL, UInt64 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), Int8) :: UInt64 +city64withseed(Decimal(76, 0) NULL, Int8 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), Int16) :: UInt64 +city64withseed(Decimal(76, 0) NULL, Int16 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), Int32) :: UInt64 +city64withseed(Decimal(76, 0) NULL, Int32 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), Int64) :: UInt64 +city64withseed(Decimal(76, 0) NULL, Int64 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), Float32) :: UInt64 +city64withseed(Decimal(76, 0) NULL, Float32 NULL) :: UInt64 NULL +city64withseed(Decimal(76, 0), Float64) :: UInt64 +city64withseed(Decimal(76, 0) NULL, Float64 NULL) :: UInt64 NULL contains(Array(UInt8), UInt8) :: Boolean contains(Array(UInt8) NULL, UInt8 NULL) :: Boolean NULL contains(Array(UInt16), UInt16) :: Boolean @@ -2227,6 +2267,10 @@ siphash64(Float32) :: UInt64 siphash64(Float32 NULL) :: UInt64 NULL siphash64(Float64) :: UInt64 siphash64(Float64 NULL) :: UInt64 NULL +siphash64(Decimal(38, 0)) :: UInt64 +siphash64(Decimal(38, 0) NULL) :: UInt64 NULL +siphash64(Decimal(76, 0)) :: UInt64 +siphash64(Decimal(76, 0) NULL) :: UInt64 NULL sleep(Float64) :: UInt8 slice(Array(Nothing), UInt64) :: Array(Nothing) slice(Array(Nothing) NULL, UInt64 NULL) :: Array(Nothing) NULL @@ -3135,6 +3179,10 @@ xxhash32(Float32) :: UInt32 xxhash32(Float32 NULL) :: UInt32 NULL xxhash32(Float64) :: UInt32 xxhash32(Float64 NULL) :: UInt32 NULL +xxhash32(Decimal(38, 0)) :: UInt32 +xxhash32(Decimal(38, 0) NULL) :: UInt32 NULL +xxhash32(Decimal(76, 0)) :: UInt32 +xxhash32(Decimal(76, 0) NULL) :: UInt32 NULL xxhash64(Variant) :: UInt64 xxhash64(Variant NULL) :: UInt64 NULL xxhash64(String) :: UInt64 @@ -3165,6 +3213,10 @@ xxhash64(Float32) :: UInt64 xxhash64(Float32 NULL) :: UInt64 NULL xxhash64(Float64) :: UInt64 xxhash64(Float64 NULL) :: UInt64 NULL +xxhash64(Decimal(38, 0)) :: UInt64 +xxhash64(Decimal(38, 0) NULL) :: UInt64 NULL +xxhash64(Decimal(76, 0)) :: UInt64 +xxhash64(Decimal(76, 0) NULL) :: UInt64 NULL yesterday() :: Date Factory functions: @@ -3200,6 +3252,7 @@ regexp_like regexp_replace regexp_substr to_decimal +to_float64 tuple Function aliases (alias to origin): diff --git a/src/query/pipeline/core/Cargo.toml b/src/query/pipeline/core/Cargo.toml index 0352ffe05456e..f1cd9d14da069 100644 --- a/src/query/pipeline/core/Cargo.toml +++ b/src/query/pipeline/core/Cargo.toml @@ -14,7 +14,6 @@ test = false [dependencies] common-exception = { path = "../../../common/exception" } common-expression = { path = "../../expression" } -common-io = { path = "../../../common/io" } async-trait = "0.1.57" futures = "0.3.24" diff --git a/src/query/pipeline/core/src/processors/port.rs b/src/query/pipeline/core/src/processors/port.rs index 24615d2b45917..8b93da3bf8fd3 100644 --- a/src/query/pipeline/core/src/processors/port.rs +++ b/src/query/pipeline/core/src/processors/port.rs @@ -18,7 +18,6 @@ use std::sync::Arc; use common_exception::Result; use common_expression::DataBlock; -use common_io::prelude::FileSplit; use crate::processors::UpdateTrigger; use crate::unsafe_cell_wrap::UnSafeCellWrap; @@ -31,10 +30,7 @@ const FLAGS_MASK: usize = 0b111; const UNSET_FLAGS_MASK: usize = !FLAGS_MASK; #[repr(align(8))] -pub enum SharedData { - Data(Result), - FilePartition(Result), -} +pub struct SharedData(pub Result); pub struct SharedStatus { data: AtomicPtr, @@ -185,31 +181,7 @@ impl InputPort { let unset_flags = HAS_DATA | NEED_DATA; match self.shared.swap(std::ptr::null_mut(), 0, unset_flags) { address if address.is_null() => None, - address => { - if let SharedData::Data(block) = *Box::from_raw(address) { - Some(block) - } else { - unreachable!() - } - } - } - } - } - - #[inline(always)] - pub fn pull_file_partition(&self) -> Option> { - unsafe { - UpdateTrigger::update_input(&self.update_trigger); - let unset_flags = HAS_DATA | NEED_DATA; - match self.shared.swap(std::ptr::null_mut(), 0, unset_flags) { - address if address.is_null() => None, - address => { - if let SharedData::FilePartition(part) = *Box::from_raw(address) { - Some(part) - } else { - unreachable!() - } - } + address => Some((*Box::from_raw(address)).0), } } } @@ -247,17 +219,7 @@ impl OutputPort { unsafe { UpdateTrigger::update_output(&self.update_trigger); - let data = Box::into_raw(Box::new(SharedData::Data(data))); - self.shared.swap(data, HAS_DATA, HAS_DATA); - } - } - - #[inline(always)] - pub fn push_split(&self, data: Result) { - unsafe { - UpdateTrigger::update_output(&self.update_trigger); - - let data = Box::into_raw(Box::new(SharedData::FilePartition(data))); + let data = Box::into_raw(Box::new(SharedData(data))); self.shared.swap(data, HAS_DATA, HAS_DATA); } } diff --git a/src/query/pipeline/core/tests/it/pipelines/processors/port_test.rs b/src/query/pipeline/core/tests/it/pipelines/processors/port_test.rs index cb90faa4d1fe3..f01cff9f3f516 100644 --- a/src/query/pipeline/core/tests/it/pipelines/processors/port_test.rs +++ b/src/query/pipeline/core/tests/it/pipelines/processors/port_test.rs @@ -63,10 +63,6 @@ impl BlockMetaInfo for TestDataMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - fn equals(&self, _: &Box) -> bool { unimplemented!("equals is unimplemented for TestDataMeta") } diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform.rs b/src/query/pipeline/transforms/src/processors/transforms/transform.rs index 781ec9374dc31..5c40857ee7174 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/transform.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/transform.rs @@ -13,9 +13,12 @@ // limitations under the License. use std::any::Any; +use std::marker::PhantomData; use std::sync::Arc; use common_exception::Result; +use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::port::OutputPort; @@ -154,3 +157,139 @@ impl Transformer { } } } + +// Transform for block meta and ignoring the block columns. +pub trait BlockMetaTransform: Send + 'static { + const NAME: &'static str; + + fn transform(&mut self, meta: B) -> Result; + + fn on_start(&mut self) -> Result<()> { + Ok(()) + } + + fn on_finish(&mut self) -> Result<()> { + Ok(()) + } +} + +pub struct BlockMetaTransformer> { + transform: T, + input: Arc, + output: Arc, + + called_on_start: bool, + called_on_finish: bool, + input_data: Option, + output_data: Option, + _phantom_data: PhantomData, +} + +impl> BlockMetaTransformer { + pub fn create(input: Arc, output: Arc, inner: T) -> Box { + Box::new(Self { + input, + output, + transform: inner, + input_data: None, + output_data: None, + called_on_start: false, + called_on_finish: false, + _phantom_data: Default::default(), + }) + } +} + +#[async_trait::async_trait] +impl> Processor for BlockMetaTransformer { + fn name(&self) -> String { + String::from(T::NAME) + } + + fn as_any(&mut self) -> &mut dyn Any { + self + } + + fn event(&mut self) -> Result { + if !self.called_on_start { + return Ok(Event::Sync); + } + + match self.output.is_finished() { + true => self.finish_input(), + false if !self.output.can_push() => self.not_need_data(), + false => match self.output_data.take() { + None if self.input_data.is_some() => Ok(Event::Sync), + None => self.pull_data(), + Some(data) => { + self.output.push_data(Ok(data)); + Ok(Event::NeedConsume) + } + }, + } + } + + fn process(&mut self) -> Result<()> { + if !self.called_on_start { + self.called_on_start = true; + self.transform.on_start()?; + return Ok(()); + } + + if let Some(mut data_block) = self.input_data.take() { + debug_assert!(data_block.is_empty()); + if let Some(block_meta) = data_block.take_meta() { + if let Some(block_meta) = B::downcast_from(block_meta) { + let data_block = self.transform.transform(block_meta)?; + self.output_data = Some(data_block); + } + } + + return Ok(()); + } + + if !self.called_on_finish { + self.called_on_finish = true; + self.transform.on_finish()?; + } + + Ok(()) + } +} + +impl> BlockMetaTransformer { + fn pull_data(&mut self) -> Result { + if self.input.has_data() { + self.input_data = Some(self.input.pull_data().unwrap()?); + return Ok(Event::Sync); + } + + if self.input.is_finished() { + return match !self.called_on_finish { + true => Ok(Event::Sync), + false => { + self.output.finish(); + Ok(Event::Finished) + } + }; + } + + self.input.set_need_data(); + Ok(Event::NeedData) + } + + fn not_need_data(&mut self) -> Result { + self.input.set_not_need_data(); + Ok(Event::NeedConsume) + } + + fn finish_input(&mut self) -> Result { + match !self.called_on_finish { + true => Ok(Event::Sync), + false => { + self.input.finish(); + Ok(Event::Finished) + } + } + } +} diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating.rs b/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating.rs index 3ed6e96b78a45..b73b01c83cadd 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating.rs @@ -13,9 +13,13 @@ // limitations under the License. use std::any::Any; +use std::collections::VecDeque; +use std::marker::PhantomData; use std::sync::Arc; use common_exception::Result; +use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::port::OutputPort; @@ -25,10 +29,10 @@ use common_pipeline_core::processors::Processor; pub trait AccumulatingTransform: Send { const NAME: &'static str; - fn transform(&mut self, data: DataBlock) -> Result>; + fn transform(&mut self, data: DataBlock) -> Result>; - fn on_finish(&mut self, _output: bool) -> Result> { - Ok(None) + fn on_finish(&mut self, _output: bool) -> Result> { + Ok(vec![]) } } @@ -39,7 +43,7 @@ pub struct AccumulatingTransformer { called_on_finish: bool, input_data: Option, - output_data: Option, + output_data: VecDeque, } impl AccumulatingTransformer { @@ -49,7 +53,7 @@ impl AccumulatingTransformer { input, output, input_data: None, - output_data: None, + output_data: VecDeque::with_capacity(1), called_on_finish: false, }) } @@ -88,7 +92,7 @@ impl Processor for AccumulatingTransformer Processor for AccumulatingTransformer Result<()> { if let Some(data_block) = self.input_data.take() { - self.output_data = self.inner.transform(data_block)?; + self.output_data.extend(self.inner.transform(data_block)?); + return Ok(()); + } + + if !self.called_on_finish { + self.called_on_finish = true; + self.output_data.extend(self.inner.on_finish(true)?); + } + + Ok(()) + } +} + +pub trait BlockMetaAccumulatingTransform: Send + 'static { + const NAME: &'static str; + + fn transform(&mut self, data: B) -> Result>; + + fn on_finish(&mut self, _output: bool) -> Result> { + Ok(None) + } +} + +pub struct BlockMetaAccumulatingTransformer> +{ + inner: T, + input: Arc, + output: Arc, + + called_on_finish: bool, + input_data: Option, + output_data: Option, + _phantom_data: PhantomData, +} + +impl> + BlockMetaAccumulatingTransformer +{ + pub fn create(input: Arc, output: Arc, inner: T) -> Box { + Box::new(Self { + inner, + input, + output, + input_data: None, + output_data: None, + called_on_finish: false, + _phantom_data: Default::default(), + }) + } +} + +impl> Drop + for BlockMetaAccumulatingTransformer +{ + fn drop(&mut self) { + if !self.called_on_finish { + self.inner.on_finish(false).unwrap(); + } + } +} + +#[async_trait::async_trait] +impl> Processor + for BlockMetaAccumulatingTransformer +{ + fn name(&self) -> String { + String::from(T::NAME) + } + + fn as_any(&mut self) -> &mut dyn Any { + self + } + + fn event(&mut self) -> Result { + if self.output.is_finished() { + if !self.called_on_finish { + return Ok(Event::Sync); + } + + self.input.finish(); + return Ok(Event::Finished); + } + + if !self.output.can_push() { + self.input.set_not_need_data(); + return Ok(Event::NeedConsume); + } + + if let Some(data_block) = self.output_data.take() { + self.output.push_data(Ok(data_block)); + return Ok(Event::NeedConsume); + } + + if self.input_data.is_some() { + return Ok(Event::Sync); + } + + if self.input.has_data() { + self.input_data = Some(self.input.pull_data().unwrap()?); + return Ok(Event::Sync); + } + + if self.input.is_finished() { + return match !self.called_on_finish { + true => Ok(Event::Sync), + false => { + self.output.finish(); + Ok(Event::Finished) + } + }; + } + + self.input.set_need_data(); + Ok(Event::NeedData) + } + + fn process(&mut self) -> Result<()> { + if let Some(mut data_block) = self.input_data.take() { + debug_assert!(data_block.is_empty()); + + if let Some(block_meta) = data_block.take_meta() { + if let Some(block_meta) = B::downcast_from(block_meta) { + self.output_data = self.inner.transform(block_meta)?; + } + } + return Ok(()); } diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index ba254ae8d7022..bf8810d1bec82 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -99,6 +99,7 @@ bytes = "1.2.1" chrono = { workspace = true } chrono-tz = { workspace = true } dashmap = "5.4" +ethnum = { version = "1.3" } futures = "0.3.24" futures-util = "0.3.24" h2 = "0.3.15" @@ -116,7 +117,6 @@ parking_lot = "0.12.1" petgraph = "0.6.2" pin-project-lite = "0.2.9" poem = { version = "1", features = ["rustls", "multipart", "compression"] } -primitive-types = "0.12.0" rand = "0.8.5" regex = "1.6.0" scopeguard = "1.1.0" diff --git a/src/query/service/src/api/rpc/exchange/exchange_sink.rs b/src/query/service/src/api/rpc/exchange/exchange_sink.rs index f807ea741865c..dfef42f4d996a 100644 --- a/src/query/service/src/api/rpc/exchange/exchange_sink.rs +++ b/src/query/service/src/api/rpc/exchange/exchange_sink.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use common_pipeline_core::pipe::Pipe; use common_pipeline_core::pipe::PipeItem; @@ -128,7 +129,7 @@ impl ExchangeSorting for SinkExchangeSorting { fn block_number(&self, data_block: &DataBlock) -> Result { let block_meta = data_block.get_meta(); let shuffle_meta = block_meta - .and_then(|meta| meta.as_any().downcast_ref::()) + .and_then(ExchangeSerializeMeta::downcast_ref_from) .unwrap(); Ok(shuffle_meta.block_number) diff --git a/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs b/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs index 3ca4f5d0439b6..a4d78dbce399f 100644 --- a/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs +++ b/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use common_pipeline_core::pipe::PipeItem; use common_pipeline_core::processors::port::InputPort; @@ -59,14 +60,11 @@ impl AsyncSink for ExchangeWriterSink { None => Err(ErrorCode::Internal( "ExchangeWriterSink only recv ExchangeSerializeMeta.", )), - Some(mut block_meta) => match block_meta - .as_mut_any() - .downcast_mut::() - { + Some(block_meta) => match ExchangeSerializeMeta::downcast_from(block_meta) { None => Err(ErrorCode::Internal( "ExchangeWriterSink only recv ExchangeSerializeMeta.", )), - Some(block_meta) => Ok(block_meta.packet.take().unwrap()), + Some(block_meta) => Ok(block_meta.packet.unwrap()), }, }?; diff --git a/src/query/service/src/api/rpc/exchange/exchange_transform_shuffle.rs b/src/query/service/src/api/rpc/exchange/exchange_transform_shuffle.rs index 87a852858a005..3c377aaf0abe5 100644 --- a/src/query/service/src/api/rpc/exchange/exchange_transform_shuffle.rs +++ b/src/query/service/src/api/rpc/exchange/exchange_transform_shuffle.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::BlockMetaInfoPtr; use common_expression::DataBlock; use common_pipeline_core::pipe::Pipe; @@ -73,10 +74,6 @@ impl BlockMetaInfo for ExchangeShuffleMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - fn equals(&self, _: &Box) -> bool { unimplemented!("Unimplemented equals ExchangeShuffleMeta") } @@ -195,29 +192,23 @@ impl Processor for ExchangeShuffleTransform { } if let Some(mut data_block) = self.try_pull_inputs()? { - let mut block_meta = data_block.take_meta(); - let shuffle_meta = block_meta - .as_mut() - .and_then(|meta| meta.as_mut_any().downcast_mut::()); - - match shuffle_meta { - None => { - return Err(ErrorCode::Internal( - "ExchangeShuffleTransform only recv ExchangeShuffleMeta.", - )); - } - Some(shuffle_meta) => { - let blocks = std::mem::take(&mut shuffle_meta.blocks); + if let Some(block_meta) = data_block.take_meta() { + if let Some(shuffle_meta) = ExchangeShuffleMeta::downcast_from(block_meta) { + let blocks = shuffle_meta.blocks; for (index, block) in blocks.into_iter().enumerate() { if !block.is_empty() { self.buffer.push_back(index, block); } } + + // Try push again. + continue; } } - // Try push again. - continue; + return Err(ErrorCode::Internal( + "ExchangeShuffleTransform only recv ExchangeShuffleMeta.", + )); } if self.all_inputs_finished && self.buffer.is_empty() { @@ -336,7 +327,7 @@ impl ExchangeSorting for ShuffleExchangeSorting { fn block_number(&self, data_block: &DataBlock) -> Result { let block_meta = data_block.get_meta(); let shuffle_meta = block_meta - .and_then(|meta| meta.as_any().downcast_ref::()) + .and_then(ExchangeShuffleMeta::downcast_ref_from) .unwrap(); for block in &shuffle_meta.blocks { diff --git a/src/query/service/src/api/rpc/exchange/serde/exchange_deserializer.rs b/src/query/service/src/api/rpc/exchange/serde/exchange_deserializer.rs index 455c01b505a09..2be106cb6a593 100644 --- a/src/query/service/src/api/rpc/exchange/serde/exchange_deserializer.rs +++ b/src/query/service/src/api/rpc/exchange/serde/exchange_deserializer.rs @@ -24,6 +24,7 @@ use common_arrow::arrow::io::ipc::IpcSchema; use common_exception::ErrorCode; use common_exception::Result; use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::BlockMetaInfoPtr; use common_expression::DataBlock; use common_expression::DataSchemaRef; @@ -103,12 +104,9 @@ impl Transform for TransformExchangeDeserializer { const NAME: &'static str = "TransformExchangeDeserializer"; fn transform(&mut self, mut data: DataBlock) -> Result { - if let Some(mut block_meta) = data.take_meta() { - if let Some(exchange_meta) = block_meta - .as_mut_any() - .downcast_mut::() - { - return match exchange_meta.packet.take().unwrap() { + if let Some(block_meta) = data.take_meta() { + if let Some(exchange_meta) = ExchangeDeserializeMeta::downcast_from(block_meta) { + return match exchange_meta.packet.unwrap() { DataPacket::ErrorCode(v) => Err(v), DataPacket::ClosingInput => unreachable!(), DataPacket::ClosingOutput => unreachable!(), @@ -163,10 +161,6 @@ impl BlockMetaInfo for ExchangeDeserializeMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - fn equals(&self, _: &Box) -> bool { unimplemented!("Unimplemented equals ExchangeSourceMeta") } diff --git a/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs b/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs index 756e00c3752f0..43d66b3deeab8 100644 --- a/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs +++ b/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs @@ -80,10 +80,6 @@ impl BlockMetaInfo for ExchangeSerializeMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - fn equals(&self, _: &Box) -> bool { unimplemented!("Unimplemented equals ExchangeSerializeMeta") } diff --git a/src/query/service/src/interpreters/interpreter_select_v2.rs b/src/query/service/src/interpreters/interpreter_select_v2.rs index fba3e950f8f2b..4412ca23ce671 100644 --- a/src/query/service/src/interpreters/interpreter_select_v2.rs +++ b/src/query/service/src/interpreters/interpreter_select_v2.rs @@ -151,6 +151,17 @@ impl SelectInterpreterV2 { Ok(()) } + + pub fn include_system_tables(&self) -> bool { + let r_lock = self.metadata.read(); + let tables = r_lock.tables(); + for t in tables { + if t.database().eq_ignore_ascii_case("system") { + return true; + } + } + false + } } #[async_trait::async_trait] @@ -169,7 +180,8 @@ impl Interpreter for SelectInterpreterV2 { async fn execute2(&self) -> Result { // 0. Need to build pipeline first to get the partitions. let mut build_res = self.build_pipeline().await?; - if self.ctx.get_settings().get_enable_query_result_cache()? { + if self.ctx.get_settings().get_enable_query_result_cache()? && !self.include_system_tables() + { let key = gen_result_cache_key(self.formatted_ast.as_ref().unwrap()); // 1. Try to get result from cache. let kv_store = UserApiProvider::instance().get_meta_store_client(); diff --git a/src/query/service/src/lib.rs b/src/query/service/src/lib.rs index 9dbcd6d2e2a66..336956916d029 100644 --- a/src/query/service/src/lib.rs +++ b/src/query/service/src/lib.rs @@ -27,6 +27,7 @@ #![feature(iterator_try_reduce)] #![feature(cursor_remaining)] #![feature(vec_into_raw_parts)] +#![feature(associated_type_bounds)] extern crate core; diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 7527dd618b455..2629bff864d7a 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -19,10 +19,12 @@ use common_catalog::table::AppendMode; use common_exception::ErrorCode; use common_exception::Result; use common_expression::type_check::check_function; +use common_expression::with_mappedhash_method; use common_expression::DataBlock; use common_expression::DataField; use common_expression::DataSchemaRef; use common_expression::FunctionContext; +use common_expression::HashMethodKind; use common_expression::SortColumnDescription; use common_functions::aggregates::AggregateFunctionFactory; use common_functions::aggregates::AggregateFunctionRef; @@ -61,20 +63,23 @@ use crate::pipelines::processors::transforms::efficiently_memory_final_aggregato use crate::pipelines::processors::transforms::AggregateExchangeSorting; use crate::pipelines::processors::transforms::FinalSingleStateAggregator; use crate::pipelines::processors::transforms::HashJoinDesc; +use crate::pipelines::processors::transforms::PartialSingleStateAggregator; use crate::pipelines::processors::transforms::RightSemiAntiJoinCompactor; +use crate::pipelines::processors::transforms::TransformAggregateSerializer; +use crate::pipelines::processors::transforms::TransformGroupBySerializer; use crate::pipelines::processors::transforms::TransformLeftJoin; use crate::pipelines::processors::transforms::TransformMarkJoin; use crate::pipelines::processors::transforms::TransformMergeBlock; +use crate::pipelines::processors::transforms::TransformPartialAggregate; +use crate::pipelines::processors::transforms::TransformPartialGroupBy; use crate::pipelines::processors::transforms::TransformRightJoin; use crate::pipelines::processors::transforms::TransformRightSemiAntiJoin; use crate::pipelines::processors::AggregatorParams; -use crate::pipelines::processors::AggregatorTransformParams; use crate::pipelines::processors::JoinHashTable; use crate::pipelines::processors::LeftJoinCompactor; use crate::pipelines::processors::MarkJoinCompactor; use crate::pipelines::processors::RightJoinCompactor; use crate::pipelines::processors::SinkBuildHashTable; -use crate::pipelines::processors::TransformAggregator; use crate::pipelines::processors::TransformCastSchema; use crate::pipelines::processors::TransformHashJoinProbe; use crate::pipelines::processors::TransformLimit; @@ -364,22 +369,56 @@ impl PipelineBuilder { fn build_aggregate_partial(&mut self, aggregate: &AggregatePartial) -> Result<()> { self.build_pipeline(&aggregate.input)?; + let params = Self::build_aggregator_params( aggregate.input.output_schema()?, - // aggregate.output_schema()?, &aggregate.group_by, &aggregate.agg_funcs, None, )?; - let pass_state_to_final = self.enable_memory_efficient_aggregator(¶ms); + if params.group_columns.is_empty() { + return self.main_pipeline.add_transform(|input, output| { + let transform = PartialSingleStateAggregator::try_create(input, output, ¶ms)?; + + if self.enable_profiling { + Ok(ProcessorPtr::create(ProfileWrapper::create( + transform, + aggregate.plan_id, + self.prof_span_set.clone(), + ))) + } else { + Ok(ProcessorPtr::create(transform)) + } + }); + } + + let group_cols = ¶ms.group_columns; + let schema_before_group_by = params.input_schema.clone(); + let sample_block = DataBlock::empty_with_schema(schema_before_group_by); + let method = DataBlock::choose_hash_method(&sample_block, group_cols)?; self.main_pipeline.add_transform(|input, output| { - let transform = TransformAggregator::try_create_partial( - AggregatorTransformParams::try_create(input, output, ¶ms)?, - self.ctx.clone(), - pass_state_to_final, - )?; + let transform = match params.aggregate_functions.is_empty() { + true => with_mappedhash_method!(|T| match method.clone() { + HashMethodKind::T(method) => TransformPartialGroupBy::try_create( + self.ctx.clone(), + method, + input, + output, + params.clone() + ), + }), + false => with_mappedhash_method!(|T| match method.clone() { + HashMethodKind::T(method) => TransformPartialAggregate::try_create( + self.ctx.clone(), + method, + input, + output, + params.clone() + ), + }), + }?; if self.enable_profiling { Ok(ProcessorPtr::create(ProfileWrapper::create( @@ -392,17 +431,31 @@ impl PipelineBuilder { } })?; + if !self.ctx.get_cluster().is_empty() { + // TODO: can serialize only when needed. + self.main_pipeline.add_transform(|input, output| { + match params.aggregate_functions.is_empty() { + true => with_mappedhash_method!(|T| match method.clone() { + HashMethodKind::T(method) => + TransformGroupBySerializer::try_create(input, output, method,), + }), + false => with_mappedhash_method!(|T| match method.clone() { + HashMethodKind::T(method) => TransformAggregateSerializer::try_create( + input, + output, + method, + params.clone(), + ), + }), + } + })?; + } + self.exchange_sorting = Some(AggregateExchangeSorting::create()); Ok(()) } - fn enable_memory_efficient_aggregator(&self, params: &Arc) -> bool { - self.ctx.get_cluster().is_empty() - && !params.group_columns.is_empty() - && self.main_pipeline.output_len() > 1 - } - fn build_aggregate_final(&mut self, aggregate: &AggregateFinal) -> Result<()> { self.build_pipeline(&aggregate.input)?; @@ -431,7 +484,7 @@ impl PipelineBuilder { }); } - efficiently_memory_final_aggregator(params, &mut self.main_pipeline) + efficiently_memory_final_aggregator(&self.ctx, params, &mut self.main_pipeline) } pub fn build_aggregator_params( diff --git a/src/query/service/src/pipelines/processors/mod.rs b/src/query/service/src/pipelines/processors/mod.rs index 534b54cb41fc9..f76eed678c012 100644 --- a/src/query/service/src/pipelines/processors/mod.rs +++ b/src/query/service/src/pipelines/processors/mod.rs @@ -18,7 +18,6 @@ pub(crate) mod transforms; pub use transforms::create_dummy_item; pub use transforms::create_dummy_items; pub use transforms::AggregatorParams; -pub use transforms::AggregatorTransformParams; pub use transforms::BlockCompactor; pub use transforms::HashJoinDesc; pub use transforms::HashJoinState; @@ -31,7 +30,6 @@ pub use transforms::RightJoinCompactor; pub use transforms::SerializerHashTable; pub use transforms::SinkBuildHashTable; pub use transforms::SortMergeCompactor; -pub use transforms::TransformAggregator; pub use transforms::TransformBlockCompact; pub use transforms::TransformCastSchema; pub use transforms::TransformCompact; diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_cell.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_cell.rs new file mode 100644 index 0000000000000..aae332578678a --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_cell.rs @@ -0,0 +1,240 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::marker::PhantomData; +use std::sync::Arc; + +use common_functions::aggregates::StateAddr; +use common_hashtable::HashtableEntryRefLike; +use common_hashtable::HashtableLike; + +use crate::pipelines::processors::transforms::group_by::Area; +use crate::pipelines::processors::transforms::group_by::ArenaHolder; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::group_by::PartitionedHashMethod; +use crate::pipelines::processors::transforms::group_by::PolymorphicKeysHelper; +use crate::pipelines::processors::AggregatorParams; + +// Manage unsafe memory usage, free memory when the cell is destroyed. +pub struct HashTableCell { + pub hashtable: T::HashTable, + pub arena: Area, + pub arena_holders: Vec, + pub temp_values: Vec< as HashtableLike>::Value>, + pub _dropper: Option>>, +} + +unsafe impl Send for HashTableCell {} + +unsafe impl Sync for HashTableCell {} + +impl Drop for HashTableCell { + fn drop(&mut self) { + if let Some(dropper) = self._dropper.take() { + dropper.destroy(&mut self.hashtable); + + for value in &self.temp_values { + dropper.destroy_value(value) + } + } + } +} + +impl HashTableCell { + pub fn create( + inner: T::HashTable, + _dropper: Arc>, + ) -> HashTableCell { + HashTableCell:: { + hashtable: inner, + arena_holders: vec![], + temp_values: vec![], + _dropper: Some(_dropper), + arena: Area::create(), + } + } +} + +pub trait HashTableDropper { + fn as_any(&self) -> &dyn Any; + fn destroy(&self, hashtable: &mut T::HashTable); + fn destroy_value(&self, value: & as HashtableLike>::Value); +} + +pub struct GroupByHashTableDropper { + _phantom: PhantomData, +} + +impl GroupByHashTableDropper { + pub fn create() -> Arc> { + Arc::new(GroupByHashTableDropper:: { + _phantom: Default::default(), + }) + } +} + +impl HashTableDropper for GroupByHashTableDropper { + fn as_any(&self) -> &dyn Any { + self + } + + fn destroy(&self, _: &mut T::HashTable<()>) { + // do nothing + } + + fn destroy_value(&self, _: &()) { + // do nothing + } +} + +pub struct AggregateHashTableDropper { + params: Arc, + _phantom: PhantomData, +} + +impl AggregateHashTableDropper { + pub fn create(params: Arc) -> Arc> { + Arc::new(AggregateHashTableDropper:: { + params, + _phantom: Default::default(), + }) + } +} + +impl HashTableDropper for AggregateHashTableDropper { + fn as_any(&self) -> &dyn Any { + self + } + + fn destroy(&self, hashtable: &mut T::HashTable) { + let aggregator_params = self.params.as_ref(); + let aggregate_functions = &aggregator_params.aggregate_functions; + let offsets_aggregate_states = &aggregator_params.offsets_aggregate_states; + + let functions = aggregate_functions + .iter() + .filter(|p| p.need_manual_drop_state()) + .collect::>(); + + let state_offsets = offsets_aggregate_states + .iter() + .enumerate() + .filter(|(idx, _)| aggregate_functions[*idx].need_manual_drop_state()) + .map(|(_, s)| *s) + .collect::>(); + + if !state_offsets.is_empty() { + for group_entity in hashtable.iter() { + let place = Into::::into(*group_entity.get()); + + for (function, state_offset) in functions.iter().zip(state_offsets.iter()) { + unsafe { function.drop_state(place.next(*state_offset)) } + } + } + } + } + + fn destroy_value(&self, value: &usize) { + let aggregator_params = self.params.as_ref(); + let aggregate_functions = &aggregator_params.aggregate_functions; + let offsets_aggregate_states = &aggregator_params.offsets_aggregate_states; + + let functions = aggregate_functions + .iter() + .filter(|p| p.need_manual_drop_state()) + .collect::>(); + + let state_offsets = offsets_aggregate_states + .iter() + .enumerate() + .filter(|(idx, _)| aggregate_functions[*idx].need_manual_drop_state()) + .map(|(_, s)| *s) + .collect::>(); + + let temp_place = StateAddr::new(*value); + for (state_offset, function) in state_offsets.iter().zip(functions.iter()) { + let place = temp_place.next(*state_offset); + unsafe { function.drop_state(place) } + } + } +} + +pub struct PartitionedHashTableDropper { + _inner_dropper: Arc>, +} + +impl PartitionedHashTableDropper { + pub fn create( + _inner_dropper: Arc>, + ) -> Arc, V>> { + Arc::new(Self { _inner_dropper }) + } + + pub fn split_cell( + mut v: HashTableCell, V>, + ) -> Vec> { + unsafe { + let arena = std::mem::replace(&mut v.arena, Area::create()); + v.arena_holders.push(ArenaHolder::create(Some(arena))); + + let dropper = v + ._dropper + .as_ref() + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + ._inner_dropper + .clone(); + + let mut cells = Vec::with_capacity(256); + while let Some(table) = v.hashtable.pop_first_inner_table() { + let mut table_cell = HashTableCell::create(table, dropper.clone()); + table_cell.arena_holders = v.arena_holders.to_vec(); + cells.push(table_cell); + } + + cells + } + } +} + +impl + HashTableDropper, V> for PartitionedHashTableDropper +{ + fn as_any(&self) -> &dyn Any { + self + } + + fn destroy( + &self, + hashtable: &mut as PolymorphicKeysHelper< + PartitionedHashMethod, + >>::HashTable, + ) { + for inner_table in hashtable.iter_tables_mut() { + self._inner_dropper.destroy(inner_table) + } + } + + fn destroy_value( + &self, + value: &< as PolymorphicKeysHelper< + PartitionedHashMethod, + >>::HashTable as HashtableLike>::Value, + ) { + self._inner_dropper.destroy_value(value) + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_sorting.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_sorting.rs index 0d2e4abd70b98..84e44cdfc2444 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_sorting.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_sorting.rs @@ -16,10 +16,11 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use crate::api::ExchangeSorting; -use crate::pipelines::processors::transforms::aggregator::AggregateInfo; +use crate::pipelines::processors::transforms::aggregator::serde::AggregateSerdeMeta; pub struct AggregateExchangeSorting {} @@ -33,10 +34,9 @@ impl ExchangeSorting for AggregateExchangeSorting { fn block_number(&self, data_block: &DataBlock) -> Result { match data_block.get_meta() { None => Ok(-1), - Some(block_meta_info) => match block_meta_info.as_any().downcast_ref::() - { + Some(block_meta_info) => match AggregateSerdeMeta::downcast_ref_from(block_meta_info) { None => Err(ErrorCode::Internal( - "Internal error, AggregateExchangeSorting only recv AggregateInfo", + "Internal error, AggregateExchangeSorting only recv AggregateSerdeMeta", )), Some(meta_info) => Ok(meta_info.bucket), }, diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_hashstate_info.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_hashstate_info.rs deleted file mode 100644 index fe438a1428ce3..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_hashstate_info.rs +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2023 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; - -use common_expression::BlockMetaInfo; -use common_expression::BlockMetaInfoPtr; -use serde::Deserialize; -use serde::Deserializer; -use serde::Serialize; -use serde::Serializer; - -use crate::pipelines::processors::transforms::group_by::ArenaHolder; - -#[derive(Debug)] -pub struct AggregateHashStateInfo { - pub bucket: usize, - // a subhashtable state - pub hash_state: Box, - pub state_holder: Option, -} - -impl AggregateHashStateInfo { - pub fn create( - bucket: usize, - hash_state: Box, - state_holder: Option, - ) -> BlockMetaInfoPtr { - Box::new(AggregateHashStateInfo { - bucket, - hash_state, - state_holder, - }) - } -} - -impl Serialize for AggregateHashStateInfo { - fn serialize(&self, _: S) -> Result - where S: Serializer { - unreachable!("AggregateHashStateInfo does not support exchanging between multiple nodes") - } -} - -impl<'de> Deserialize<'de> for AggregateHashStateInfo { - fn deserialize(_: D) -> Result - where D: Deserializer<'de> { - unreachable!("AggregateHashStateInfo does not support exchanging between multiple nodes") - } -} - -#[typetag::serde(name = "aggregate_hash_state_info")] -impl BlockMetaInfo for AggregateHashStateInfo { - fn as_any(&self) -> &dyn Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - unimplemented!("Unimplemented clone for AggregateHashStateInfo") - } - - fn equals(&self, _: &Box) -> bool { - unimplemented!("Unimplemented equals for AggregateHashStateInfo") - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_meta.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_meta.rs new file mode 100644 index 0000000000000..1075b9525284b --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_meta.rs @@ -0,0 +1,128 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::fmt::Debug; +use std::fmt::Formatter; + +use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoPtr; +use common_expression::Column; +use common_expression::DataBlock; + +use crate::pipelines::processors::transforms::group_by::ArenaHolder; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::HashTableCell; + +pub struct HashTablePayload { + pub bucket: isize, + pub cell: HashTableCell, + pub arena_holder: ArenaHolder, +} + +pub struct SerializedPayload { + pub bucket: isize, + pub data_block: DataBlock, +} + +impl SerializedPayload { + pub fn get_group_by_column(&self) -> &Column { + let entry = self.data_block.columns().last().unwrap(); + entry.value.as_column().unwrap() + } +} + +pub enum AggregateMeta { + Serialized(SerializedPayload), + HashTable(HashTablePayload), + + Partitioned { bucket: isize, data: Vec }, +} + +impl AggregateMeta { + pub fn create_hashtable(bucket: isize, cell: HashTableCell) -> BlockMetaInfoPtr { + Box::new(AggregateMeta::::HashTable(HashTablePayload { + cell, + bucket, + arena_holder: ArenaHolder::create(None), + })) + } + + pub fn create_serialized(bucket: isize, block: DataBlock) -> BlockMetaInfoPtr { + Box::new(AggregateMeta::::Serialized(SerializedPayload { + bucket, + data_block: block, + })) + } + + pub fn create_partitioned(bucket: isize, data: Vec) -> BlockMetaInfoPtr { + Box::new(AggregateMeta::::Partitioned { data, bucket }) + } +} + +impl serde::Serialize + for AggregateMeta +{ + fn serialize(&self, _: S) -> Result + where S: serde::Serializer { + unreachable!("AggregateMeta does not support exchanging between multiple nodes") + } +} + +impl<'de, Method: HashMethodBounds, V: Send + Sync + 'static> serde::Deserialize<'de> + for AggregateMeta +{ + fn deserialize(_: D) -> Result + where D: serde::Deserializer<'de> { + unreachable!("AggregateMeta does not support exchanging between multiple nodes") + } +} + +impl Debug for AggregateMeta { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + AggregateMeta::HashTable(_) => f.debug_struct("AggregateMeta::HashTable").finish(), + AggregateMeta::Partitioned { .. } => { + f.debug_struct("AggregateMeta::Partitioned").finish() + } + AggregateMeta::Serialized { .. } => { + f.debug_struct("AggregateMeta::Serialized").finish() + } + } + } +} + +impl BlockMetaInfo + for AggregateMeta +{ + fn as_any(&self) -> &dyn Any { + self + } + + fn typetag_deserialize(&self) { + unimplemented!("AggregateMeta does not support exchanging between multiple nodes") + } + + fn typetag_name(&self) -> &'static str { + unimplemented!("AggregateMeta does not support exchanging between multiple nodes") + } + + fn equals(&self, _: &Box) -> bool { + unimplemented!("Unimplemented equals for AggregateMeta") + } + + fn clone_self(&self) -> Box { + unimplemented!("Unimplemented clone for AggregateMeta") + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_final_parallel.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_final_parallel.rs deleted file mode 100644 index f1ee823c121e8..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_final_parallel.rs +++ /dev/null @@ -1,343 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::borrow::BorrowMut; -use std::sync::Arc; -use std::vec; - -use common_exception::ErrorCode; -use common_exception::Result; -use common_expression::ColumnBuilder; -use common_expression::DataBlock; -use common_functions::aggregates::StateAddr; -use common_hashtable::HashtableEntryMutRefLike; -use common_hashtable::HashtableEntryRefLike; -use common_hashtable::HashtableLike; - -use super::estimated_key_size; -use super::AggregateHashStateInfo; -use crate::pipelines::processors::transforms::group_by::Area; -use crate::pipelines::processors::transforms::group_by::ArenaHolder; -use crate::pipelines::processors::transforms::group_by::GroupColumnsBuilder; -use crate::pipelines::processors::transforms::group_by::HashMethodBounds; -use crate::pipelines::processors::transforms::group_by::KeysColumnIter; -use crate::pipelines::processors::AggregatorParams; - -pub struct BucketAggregator { - area: Area, - method: Method, - params: Arc, - hash_table: Method::HashTable, - state_holders: Vec>, - - pub(crate) reach_limit: bool, - // used for deserialization only if has agg, so we can reuse it during the loop - temp_place: StateAddr, -} - -impl BucketAggregator { - pub fn create(method: Method, params: Arc) -> Result { - let mut area = Area::create(); - let hash_table = method.create_hash_table()?; - let temp_place = match params.aggregate_functions.is_empty() { - true => StateAddr::new(0), - false => params.alloc_layout(&mut area), - }; - - Ok(Self { - area, - method, - params, - hash_table, - reach_limit: false, - state_holders: Vec::with_capacity(16), - temp_place, - }) - } - - fn merge_partial_hashstates(&mut self, hashtable: &mut Method::HashTable) -> Result<()> { - // Note: We can't swap the ptr here, there maybe some bugs if the original hashtable - // if self.hash_table.len() == 0 { - // std::mem::swap(&mut self.hash_table, hashtable); - // return Ok(()); - // } - - if !HAS_AGG { - unsafe { - for key in hashtable.iter() { - let _ = self.hash_table.insert_and_entry(key.key()); - } - if let Some(limit) = self.params.limit { - if self.hash_table.len() >= limit { - return Ok(()); - } - } - } - } else { - let aggregate_functions = &self.params.aggregate_functions; - let offsets_aggregate_states = &self.params.offsets_aggregate_states; - - for entry in hashtable.iter() { - let key = entry.key(); - unsafe { - match self.hash_table.insert(key) { - Ok(e) => { - // just set new places and the arena will be keeped in partial state - e.write(*entry.get()); - } - Err(place) => { - // place already exists - // that means we should merge the aggregation - let place = StateAddr::new(*place); - let old_place = StateAddr::new(*entry.get()); - - for (idx, aggregate_function) in aggregate_functions.iter().enumerate() - { - let final_place = place.next(offsets_aggregate_states[idx]); - let state_place = old_place.next(offsets_aggregate_states[idx]); - aggregate_function.merge(final_place, state_place)?; - aggregate_function.drop_state(state_place); - } - } - } - } - } - } - hashtable.clear(); - Ok(()) - } - - pub fn merge_blocks(&mut self, blocks: Vec) -> Result> { - if blocks.is_empty() { - return Ok(vec![]); - } - - for mut data_block in blocks { - if let Some(mut meta) = data_block.take_meta() { - if let Some(info) = meta.as_mut_any().downcast_mut::() { - let hashtable = info - .hash_state - .downcast_mut::>() - .unwrap(); - self.state_holders.push(info.state_holder.take()); - self.merge_partial_hashstates(hashtable)?; - continue; - } - } - - let block = data_block.convert_to_full(); - // 1.1 and 1.2. - let aggregate_function_len = self.params.aggregate_functions.len(); - let keys_column = block - .get_by_offset(aggregate_function_len) - .value - .as_column() - .unwrap(); - let keys_iter = self.method.keys_iter_from_column(keys_column)?; - - if !HAS_AGG { - unsafe { - for key in keys_iter.iter() { - let _ = self.hash_table.insert_and_entry(key); - } - - if let Some(limit) = self.params.limit { - if self.hash_table.len() >= limit { - break; - } - } - } - } else { - // first state places of current block - let places = self.lookup_state(&keys_iter); - - let states_columns = (0..aggregate_function_len) - .map(|i| block.get_by_offset(i)) - .collect::>(); - let mut states_binary_columns = Vec::with_capacity(states_columns.len()); - - for agg in states_columns.iter().take(aggregate_function_len) { - let aggr_column = - agg.value.as_column().unwrap().as_string().ok_or_else(|| { - ErrorCode::IllegalDataType(format!( - "Aggregation column should be StringType, but got {:?}", - agg.value - )) - })?; - states_binary_columns.push(aggr_column); - } - - let aggregate_functions = &self.params.aggregate_functions; - let offsets_aggregate_states = &self.params.offsets_aggregate_states; - - for (row, place) in places.iter() { - for (idx, aggregate_function) in aggregate_functions.iter().enumerate() { - let final_place = place.next(offsets_aggregate_states[idx]); - let state_place = self.temp_place.next(offsets_aggregate_states[idx]); - - let mut data = unsafe { states_binary_columns[idx].index_unchecked(*row) }; - aggregate_function.deserialize(state_place, &mut data)?; - aggregate_function.merge(final_place, state_place)?; - } - } - } - } - - let value_size = estimated_key_size(&self.hash_table); - let keys_len = self.hash_table.len(); - - let mut group_columns_builder = - self.method - .group_columns_builder(keys_len, value_size, &self.params); - - if !HAS_AGG { - for group_entity in self.hash_table.iter() { - group_columns_builder.append_value(group_entity.key()); - } - - let columns = group_columns_builder.finish()?; - - Ok(vec![DataBlock::new_from_columns(columns)]) - } else { - let aggregate_functions = &self.params.aggregate_functions; - let offsets_aggregate_states = &self.params.offsets_aggregate_states; - - let mut aggregates_column_builder = { - let mut values = vec![]; - for aggregate_function in aggregate_functions { - let data_type = aggregate_function.return_type()?; - let builder = ColumnBuilder::with_capacity(&data_type, self.hash_table.len()); - values.push(builder) - } - values - }; - - let mut places = Vec::with_capacity(keys_len); - for group_entity in self.hash_table.iter() { - places.push(StateAddr::new(*group_entity.get())); - group_columns_builder.append_value(group_entity.key()); - } - - for (idx, aggregate_function) in aggregate_functions.iter().enumerate() { - let builder = aggregates_column_builder[idx].borrow_mut(); - - if idx > 0 { - for place in places.iter_mut() { - *place = place.next( - offsets_aggregate_states[idx] - offsets_aggregate_states[idx - 1], - ); - } - } - aggregate_function.batch_merge_result(&places, builder)?; - } - - // Build final state block. - let mut columns = aggregates_column_builder - .into_iter() - .map(|builder| builder.build()) - .collect::>(); - - let group_columns = group_columns_builder.finish()?; - columns.extend_from_slice(&group_columns); - - Ok(vec![DataBlock::new_from_columns(columns)]) - } - } - - /// Allocate aggregation function state for each key(the same key can always get the same state) - #[inline(always)] - fn lookup_state(&mut self, keys_iter: &Method::KeysColumnIter) -> Vec<(usize, StateAddr)> { - let iter = keys_iter.iter(); - let (len, _) = iter.size_hint(); - let mut places = Vec::with_capacity(len); - - let mut current_len = self.hash_table.len(); - unsafe { - for (row, key) in iter.enumerate() { - if self.reach_limit { - let entry = self.hash_table.entry(key); - if let Some(entry) = entry { - let place = Into::::into(*entry.get()); - places.push((row, place)); - } - continue; - } - - match self.hash_table.insert_and_entry(key) { - Ok(mut entry) => { - let place = self.params.alloc_layout(&mut self.area); - places.push((row, place)); - - *entry.get_mut() = place.addr(); - - if let Some(limit) = self.params.limit { - current_len += 1; - if current_len >= limit { - self.reach_limit = true; - } - } - } - Err(entry) => { - let place = Into::::into(*entry.get()); - places.push((row, place)); - } - } - } - } - - places - } - - fn drop_states(&mut self) { - let aggregator_params = self.params.as_ref(); - let aggregate_functions = &aggregator_params.aggregate_functions; - let offsets_aggregate_states = &aggregator_params.offsets_aggregate_states; - - let functions = aggregate_functions - .iter() - .filter(|p| p.need_manual_drop_state()) - .collect::>(); - - let state_offsets = offsets_aggregate_states - .iter() - .enumerate() - .filter(|(idx, _)| aggregate_functions[*idx].need_manual_drop_state()) - .map(|(_, s)| *s) - .collect::>(); - - if !state_offsets.is_empty() { - for group_entity in self.hash_table.iter() { - let place = Into::::into(*group_entity.get()); - - for (function, state_offset) in functions.iter().zip(state_offsets.iter()) { - unsafe { function.drop_state(place.next(*state_offset)) } - } - } - } - - if HAS_AGG { - for (state_offset, function) in state_offsets.iter().zip(functions.iter()) { - let place = self.temp_place.next(*state_offset); - unsafe { function.drop_state(place) } - } - } - self.state_holders.clear(); - } -} - -impl Drop for BucketAggregator { - fn drop(&mut self) { - self.drop_states(); - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_params.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_params.rs index 98b8cf7bd3583..0331bef9cb93a 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_params.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_params.rs @@ -17,16 +17,12 @@ use std::sync::Arc; use common_exception::Result; use common_expression::types::DataType; -use common_expression::DataBlock; use common_expression::DataSchemaRef; -use common_expression::HashMethodKind; use common_functions::aggregates::get_layout_offsets; use common_functions::aggregates::AggregateFunctionRef; use common_functions::aggregates::StateAddr; use common_sql::IndexType; -use crate::pipelines::processors::port::InputPort; -use crate::pipelines::processors::port::OutputPort; use crate::pipelines::processors::transforms::group_by::Area; pub struct AggregatorParams { @@ -92,30 +88,3 @@ impl AggregatorParams { .any(|f| f.name().contains("DistinctCombinator")) } } - -pub struct AggregatorTransformParams { - pub method: HashMethodKind, - pub transform_input_port: Arc, - pub transform_output_port: Arc, - pub aggregator_params: Arc, -} - -impl AggregatorTransformParams { - pub fn try_create( - transform_input_port: Arc, - transform_output_port: Arc, - aggregator_params: &Arc, - ) -> Result { - let group_cols = &aggregator_params.group_columns; - let schema_before_group_by = aggregator_params.input_schema.clone(); - let sample_block = DataBlock::empty_with_schema(schema_before_group_by); - let method = DataBlock::choose_hash_method(&sample_block, group_cols)?; - - Ok(AggregatorTransformParams { - method, - transform_input_port, - transform_output_port, - aggregator_params: aggregator_params.clone(), - }) - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_partial.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_partial.rs deleted file mode 100644 index ec4c8744e8052..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_partial.rs +++ /dev/null @@ -1,305 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_exception::Result; -use common_expression::types::string::StringColumnBuilder; -use common_expression::BlockEntry; -use common_expression::Column; -use common_expression::DataBlock; -use common_functions::aggregates::StateAddr; -use common_functions::aggregates::StateAddrs; -use common_hashtable::HashtableEntryMutRefLike; -use common_hashtable::HashtableEntryRefLike; -use common_hashtable::HashtableLike; - -use super::estimated_key_size; -use crate::pipelines::processors::transforms::group_by::Area; -use crate::pipelines::processors::transforms::group_by::ArenaHolder; -use crate::pipelines::processors::transforms::group_by::HashMethodBounds; -use crate::pipelines::processors::transforms::group_by::KeysColumnBuilder; -use crate::pipelines::processors::transforms::transform_aggregator::Aggregator; -use crate::pipelines::processors::AggregatorParams; - -pub struct PartialAggregator { - pub states_dropped: bool, - - pub area: Option, - pub area_holder: Option, - pub method: Method, - pub hash_table: Method::HashTable, - pub params: Arc, - pub generated: bool, - pub input_rows: usize, - pub pass_state_to_final: bool, - pub two_level_mode: bool, -} - -impl PartialAggregator { - pub fn create( - method: Method, - params: Arc, - pass_state_to_final: bool, - ) -> Result { - let hash_table = method.create_hash_table()?; - Ok(Self { - params, - method, - hash_table, - area: Some(Area::create()), - area_holder: None, - states_dropped: false, - generated: false, - input_rows: 0, - pass_state_to_final, - two_level_mode: false, - }) - } - - #[inline(always)] - fn lookup_key(keys_iter: Method::HashKeyIter<'_>, hashtable: &mut Method::HashTable) { - unsafe { - for key in keys_iter { - let _ = hashtable.insert_and_entry(key); - } - } - } - - /// Allocate aggregation function state for each key(the same key can always get the same state) - #[inline(always)] - fn lookup_state( - area: &mut Area, - params: &Arc, - keys_iter: Method::HashKeyIter<'_>, - hashtable: &mut Method::HashTable, - ) -> StateAddrs { - let mut places = Vec::with_capacity(keys_iter.size_hint().0); - - unsafe { - for key in keys_iter { - match hashtable.insert_and_entry(key) { - Ok(mut entry) => { - let place = params.alloc_layout(area); - places.push(place); - *entry.get_mut() = place.addr(); - } - Err(entry) => { - let place = Into::::into(*entry.get()); - places.push(place); - } - } - } - } - - places - } - - // Block should be `convert_to_full`. - #[inline(always)] - fn aggregate_arguments( - block: &DataBlock, - params: &Arc, - ) -> Result>> { - let aggregate_functions_arguments = ¶ms.aggregate_functions_arguments; - let mut aggregate_arguments_columns = - Vec::with_capacity(aggregate_functions_arguments.len()); - for function_arguments in aggregate_functions_arguments { - let mut function_arguments_column = Vec::with_capacity(function_arguments.len()); - - for argument_index in function_arguments { - // Unwrap safety: chunk has been `convert_to_full`. - let argument_column = block - .get_by_offset(*argument_index) - .value - .as_column() - .unwrap(); - function_arguments_column.push(argument_column.clone()); - } - - aggregate_arguments_columns.push(function_arguments_column); - } - - Ok(aggregate_arguments_columns) - } - - #[inline(always)] - #[allow(clippy::ptr_arg)] // &[StateAddr] slower than &StateAddrs ~20% - fn execute( - params: &Arc, - block: &DataBlock, - places: &StateAddrs, - ) -> Result<()> { - let aggregate_functions = ¶ms.aggregate_functions; - let offsets_aggregate_states = ¶ms.offsets_aggregate_states; - let aggregate_arguments_columns = Self::aggregate_arguments(block, params)?; - - // This can beneficial for the case of dereferencing - // This will help improve the performance ~hundreds of megabits per second - let aggr_arg_columns_slice = &aggregate_arguments_columns; - - for index in 0..aggregate_functions.len() { - let rows = block.num_rows(); - let function = &aggregate_functions[index]; - let state_offset = offsets_aggregate_states[index]; - let function_arguments = &aggr_arg_columns_slice[index]; - function.accumulate_keys(places, state_offset, function_arguments, rows)?; - } - - Ok(()) - } - - #[inline(always)] - pub fn group_columns<'a>(block: &'a DataBlock, indices: &[usize]) -> Vec<&'a BlockEntry> { - indices - .iter() - .map(|&index| block.get_by_offset(index)) - .collect::>() - } - - pub fn try_holder_state(&mut self) { - let area = self.area.take(); - if area.is_some() { - self.area_holder = Some(ArenaHolder::create(area)); - } - } - - #[inline(always)] - fn generate_data(&mut self) -> Result> { - if self.generated || self.hash_table.len() == 0 { - return Ok(vec![]); - } - self.generated = true; - - let state_groups_len = self.hash_table.len(); - let aggregator_params = self.params.as_ref(); - let funcs = &aggregator_params.aggregate_functions; - let aggr_len = funcs.len(); - let offsets_aggregate_states = &aggregator_params.offsets_aggregate_states; - - // Builders. - let mut state_builders = (0..aggr_len) - .map(|_| StringColumnBuilder::with_capacity(state_groups_len, state_groups_len * 4)) - .collect::>(); - - let value_size = estimated_key_size(&self.hash_table); - let mut group_key_builder = self - .method - .keys_column_builder(state_groups_len, value_size); - - // TODO use batch - for group_entity in self.hash_table.iter() { - let place = Into::::into(*group_entity.get()); - - if HAS_AGG { - for (idx, func) in funcs.iter().enumerate() { - let arg_place = place.next(offsets_aggregate_states[idx]); - func.serialize(arg_place, &mut state_builders[idx].data)?; - state_builders[idx].commit_row(); - } - } - group_key_builder.append_value(group_entity.key()); - } - - let mut columns = Vec::with_capacity(state_builders.len() + 1); - - if HAS_AGG { - for builder in state_builders.into_iter() { - columns.push(Column::String(builder.build())); - } - } - - let group_key_col = group_key_builder.finish(); - columns.push(group_key_col); - Ok(vec![DataBlock::new_from_columns(columns)]) - } -} - -impl Aggregator - for PartialAggregator -{ - const NAME: &'static str = "GroupByPartialTransform"; - - fn consume(&mut self, block: DataBlock) -> Result<()> { - self.input_rows += block.num_rows(); - let block = block.convert_to_full(); - // 1.1 and 1.2. - let group_columns = Self::group_columns(&block, &self.params.group_columns); - let group_columns = group_columns - .iter() - .map(|c| (c.value.as_column().unwrap().clone(), c.data_type.clone())) - .collect::>(); - let group_keys_state = self - .method - .build_keys_state(&group_columns, block.num_rows())?; - - let group_keys_iter = self.method.build_keys_iter(&group_keys_state)?; - - if HAS_AGG { - let area = self.area.as_mut().unwrap(); - let places = - Self::lookup_state(area, &self.params, group_keys_iter, &mut self.hash_table); - Self::execute(&self.params, &block, &places) - } else { - Self::lookup_key(group_keys_iter, &mut self.hash_table); - Ok(()) - } - } - - fn generate(&mut self) -> Result> { - self.generate_data() - } -} - -impl PartialAggregator { - pub fn drop_states(&mut self) { - if !self.states_dropped { - let aggregator_params = self.params.as_ref(); - let aggregate_functions = &aggregator_params.aggregate_functions; - let offsets_aggregate_states = &aggregator_params.offsets_aggregate_states; - - let functions = aggregate_functions - .iter() - .filter(|p| p.need_manual_drop_state()) - .collect::>(); - - let states = offsets_aggregate_states - .iter() - .enumerate() - .filter(|(idx, _)| aggregate_functions[*idx].need_manual_drop_state()) - .map(|(_, s)| *s) - .collect::>(); - - if !states.is_empty() { - for group_entity in self.hash_table.iter() { - let place = Into::::into(*group_entity.get()); - - for (function, state_offset) in functions.iter().zip(states.iter()) { - unsafe { function.drop_state(place.next(*state_offset)) } - } - } - } - drop(self.area.take()); - drop(self.area_holder.take()); - self.hash_table.clear(); - self.states_dropped = true; - } - } -} - -impl Drop for PartialAggregator { - fn drop(&mut self) { - self.drop_states(); - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_partitioned.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_partitioned.rs deleted file mode 100644 index 81a3af1da54cf..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_partitioned.rs +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::time::Instant; - -use common_exception::ErrorCode; -use common_exception::Result; -use common_expression::types::string::StringColumnBuilder; -use common_expression::types::DataType; -use common_expression::BlockEntry; -use common_expression::Column; -use common_expression::DataBlock; -use common_expression::Value; -use common_functions::aggregates::StateAddr; -use common_hashtable::HashtableEntryMutRefLike; -use common_hashtable::HashtableEntryRefLike; -use common_hashtable::HashtableLike; -use tracing::info; - -use super::estimated_key_size; -use crate::pipelines::processors::transforms::aggregator::aggregate_info::AggregateInfo; -use crate::pipelines::processors::transforms::aggregator::AggregateHashStateInfo; -use crate::pipelines::processors::transforms::aggregator::PartialAggregator; -use crate::pipelines::processors::transforms::group_by::HashMethodBounds; -use crate::pipelines::processors::transforms::group_by::KeysColumnBuilder; -use crate::pipelines::processors::transforms::group_by::PartitionedHashMethod; -use crate::pipelines::processors::transforms::group_by::PolymorphicKeysHelper; -use crate::pipelines::processors::transforms::transform_aggregator::Aggregator; -use crate::pipelines::processors::AggregatorParams; - -pub trait PartitionedAggregatorLike -where Self: Aggregator + Send -{ - const SUPPORT_PARTITION: bool; - - type PartitionedAggregator: Aggregator; - - fn get_state_cardinality(&self) -> usize { - 0 - } - - fn get_state_bytes(&self) -> usize { - 0 - } - - fn convert_partitioned(self) -> Result> { - Err(ErrorCode::Unimplemented(format!( - "Partitioned aggregator is unimplemented for {}", - Self::NAME - ))) - } - - fn convert_partitioned_block(_agg: &mut Self::PartitionedAggregator) -> Result> { - Err(ErrorCode::Unimplemented(format!( - "Partitioned aggregator is unimplemented for {}", - Self::NAME - ))) - } -} - -impl PartitionedAggregatorLike - for PartialAggregator -{ - const SUPPORT_PARTITION: bool = Method::SUPPORT_PARTITIONED; - - type PartitionedAggregator = PartialAggregator>; - - fn get_state_cardinality(&self) -> usize { - self.hash_table.len() - } - - fn get_state_bytes(&self) -> usize { - self.hash_table.bytes_len() - } - - // PartialAggregator -> PartitionedAggregator> - fn convert_partitioned(mut self) -> Result> { - let instant = Instant::now(); - let method = self.method.clone(); - let two_level_method = PartitionedHashMethod::create(method); - let mut two_level_hashtable = two_level_method.create_hash_table()?; - - unsafe { - for item in self.hash_table.iter() { - match two_level_hashtable.insert_and_entry(item.key()) { - Ok(mut entry) => { - *entry.get_mut() = *item.get(); - } - Err(mut entry) => { - *entry.get_mut() = *item.get(); - } - }; - } - } - - info!( - "Convert to Partitioned aggregator elapsed: {:?}", - instant.elapsed() - ); - - self.states_dropped = true; - Ok(PartitionedAggregator:: { - inner: PartialAggregator::> { - area: self.area.take(), - area_holder: None, - params: self.params.clone(), - states_dropped: false, - method: two_level_method, - hash_table: two_level_hashtable, - generated: false, - input_rows: self.input_rows, - pass_state_to_final: self.pass_state_to_final, - two_level_mode: true, - }, - }) - } - - fn convert_partitioned_block(agg: &mut Self::PartitionedAggregator) -> Result> { - let mut data_blocks = Vec::with_capacity(256); - - fn clear_table>(table: &mut T, params: &AggregatorParams) { - let aggregate_functions = ¶ms.aggregate_functions; - let offsets_aggregate_states = ¶ms.offsets_aggregate_states; - - let functions = aggregate_functions - .iter() - .filter(|p| p.need_manual_drop_state()) - .collect::>(); - - let states = offsets_aggregate_states - .iter() - .enumerate() - .filter(|(idx, _)| aggregate_functions[*idx].need_manual_drop_state()) - .map(|(_, s)| *s) - .collect::>(); - - if !states.is_empty() { - for group_entity in table.iter() { - let place = Into::::into(*group_entity.get()); - - for (function, state_offset) in functions.iter().zip(states.iter()) { - unsafe { function.drop_state(place.next(*state_offset)) } - } - } - } - - table.clear(); - } - - for (bucket, inner_table) in agg.hash_table.iter_tables_mut().enumerate() { - if inner_table.len() == 0 { - continue; - } - - if agg.pass_state_to_final { - let table = std::mem::replace(inner_table, agg.method.method.create_hash_table()?); - let rows = table.len(); - agg.try_holder_state(); - let meta = AggregateHashStateInfo::create( - bucket, - Box::new(table), - agg.area_holder.clone(), - ); - - let block = DataBlock::new_with_meta(vec![], rows, Some(meta)); - return Ok(vec![block]); - } - - let capacity = inner_table.len(); - let iterator = inner_table.iter(); - - let aggregator_params = agg.params.as_ref(); - let funcs = &aggregator_params.aggregate_functions; - let aggr_len = funcs.len(); - let offsets_aggregate_states = &aggregator_params.offsets_aggregate_states; - - // Builders. - let mut state_builders: Vec = (0..aggr_len) - .map(|_| StringColumnBuilder::with_capacity(capacity, capacity * 4)) - .collect(); - - let value_size = estimated_key_size(inner_table); - let mut group_key_builder = agg.method.keys_column_builder(capacity, value_size); - - for group_entity in iterator { - let place = Into::::into(*group_entity.get()); - - for (idx, func) in funcs.iter().enumerate() { - let arg_place = place.next(offsets_aggregate_states[idx]); - func.serialize(arg_place, &mut state_builders[idx].data)?; - state_builders[idx].commit_row(); - } - - group_key_builder.append_value(group_entity.key()); - } - - let mut columns = Vec::with_capacity(state_builders.len() + 1); - for builder in state_builders.into_iter() { - let col = builder.build(); - columns.push(BlockEntry { - value: Value::Column(Column::String(col)), - data_type: DataType::String, - }); - } - - let col = group_key_builder.finish(); - let num_rows = col.len(); - let group_key_type = col.data_type(); - - columns.push(BlockEntry { - value: Value::Column(col), - data_type: group_key_type, - }); - - data_blocks.push(DataBlock::new_with_meta( - columns, - num_rows, - Some(AggregateInfo::create(bucket as isize)), - )); - - clear_table(inner_table, &agg.params); - - // streaming return Partitioned blocks by bucket - return Ok(data_blocks); - } - - if !agg.pass_state_to_final { - drop(agg.area.take()); - drop(agg.area_holder.take()); - } - - Ok(data_blocks) - } -} - -// Example: PartitionedAggregator> -> -// PartitionedAggregator { -// inner: PartialAggregator> -// } -pub struct PartitionedAggregator { - inner: T::PartitionedAggregator, -} - -impl Aggregator for PartitionedAggregator { - const NAME: &'static str = "PartitionedAggregator"; - - #[inline(always)] - fn consume(&mut self, data: DataBlock) -> Result<()> { - self.inner.consume(data) - } - - #[inline(always)] - fn generate(&mut self) -> Result> { - T::convert_partitioned_block(&mut self.inner) - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs index 5c595c7fb2de0..91698e2089c66 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs @@ -12,26 +12,34 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod aggregate_cell; mod aggregate_exchange_sorting; -mod aggregate_hashstate_info; -mod aggregate_info; -mod aggregator_final_parallel; +mod aggregate_meta; mod aggregator_params; -mod aggregator_partial; -mod aggregator_partitioned; -mod aggregator_single_key; +mod serde; +mod transform_aggregate_final; +mod transform_aggregate_partial; +mod transform_group_by_final; +mod transform_group_by_partial; +mod transform_partition_bucket; +mod transform_single_key; mod utils; +pub use aggregate_cell::HashTableCell; +pub use aggregate_cell::PartitionedHashTableDropper; pub use aggregate_exchange_sorting::AggregateExchangeSorting; -pub use aggregate_hashstate_info::AggregateHashStateInfo; -pub use aggregate_info::AggregateInfo; -pub use aggregate_info::OverflowInfo; -pub use aggregator_final_parallel::BucketAggregator; pub use aggregator_params::AggregatorParams; -pub use aggregator_params::AggregatorTransformParams; -pub use aggregator_partial::PartialAggregator; -pub use aggregator_partitioned::PartitionedAggregator; -pub use aggregator_partitioned::PartitionedAggregatorLike; -pub use aggregator_single_key::FinalSingleStateAggregator; -pub use aggregator_single_key::PartialSingleStateAggregator; +pub use transform_aggregate_final::TransformFinalAggregate; +pub use transform_aggregate_partial::TransformPartialAggregate; +pub use transform_group_by_final::TransformFinalGroupBy; +pub use transform_group_by_partial::TransformPartialGroupBy; +pub use transform_partition_bucket::efficiently_memory_final_aggregator; +pub use transform_partition_bucket::TransformPartitionBucket; +pub use transform_single_key::FinalSingleStateAggregator; +pub use transform_single_key::PartialSingleStateAggregator; pub use utils::*; + +pub use self::serde::TransformAggregateDeserializer; +pub use self::serde::TransformAggregateSerializer; +pub use self::serde::TransformGroupByDeserializer; +pub use self::serde::TransformGroupBySerializer; diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/mod.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/mod.rs new file mode 100644 index 0000000000000..a11fd3bdb2d25 --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/mod.rs @@ -0,0 +1,23 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod serde_meta; +mod transform_deserializer; +mod transform_serializer; + +pub use serde_meta::AggregateSerdeMeta; +pub use transform_deserializer::TransformAggregateDeserializer; +pub use transform_deserializer::TransformGroupByDeserializer; +pub use transform_serializer::TransformAggregateSerializer; +pub use transform_serializer::TransformGroupBySerializer; diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_info.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs similarity index 62% rename from src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_info.rs rename to src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs index 25d021dc784b7..18a5516cf342f 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_info.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs @@ -1,4 +1,4 @@ -// Copyright 2022 Datafuse Labs. +// Copyright 2023 Datafuse Labs. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,51 +13,36 @@ // limitations under the License. use std::any::Any; -use std::collections::HashMap; use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::BlockMetaInfoPtr; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] -pub struct OverflowInfo { - pub temporary_path: String, - // bucket_id -> (offset, length) - pub bucket_info: HashMap, -} - -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] -pub struct AggregateInfo { +pub struct AggregateSerdeMeta { pub bucket: isize, - pub overflow: Option, } -impl AggregateInfo { +impl AggregateSerdeMeta { pub fn create(bucket: isize) -> BlockMetaInfoPtr { - Box::new(AggregateInfo { - bucket, - overflow: None, - }) + Box::new(AggregateSerdeMeta { bucket }) } } -#[typetag::serde(name = "aggregate_info")] -impl BlockMetaInfo for AggregateInfo { +#[typetag::serde(name = "aggregate_serde")] +impl BlockMetaInfo for AggregateSerdeMeta { fn as_any(&self) -> &dyn Any { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - Box::new(self.clone()) - } - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { + match AggregateSerdeMeta::downcast_ref_from(info) { None => false, Some(other) => self == other, } } + + fn clone_self(&self) -> Box { + Box::new(self.clone()) + } } diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs new file mode 100644 index 0000000000000..cc125f2611a63 --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs @@ -0,0 +1,100 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::marker::PhantomData; +use std::sync::Arc; + +use common_exception::Result; +use common_expression::BlockMetaInfoDowncast; +use common_expression::DataBlock; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::Event; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_core::processors::Processor; + +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::aggregator::serde::serde_meta::AggregateSerdeMeta; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; + +pub struct TransformDeserializer { + input: Arc, + output: Arc, + _phantom: PhantomData<(Method, V)>, +} + +impl TransformDeserializer { + pub fn try_create(input: Arc, output: Arc) -> Result { + Ok(ProcessorPtr::create(Box::new(TransformDeserializer::< + Method, + V, + > { + input, + output, + _phantom: Default::default(), + }))) + } +} + +#[async_trait::async_trait] +impl Processor for TransformDeserializer +where + Method: HashMethodBounds, + V: Send + Sync + 'static, +{ + fn name(&self) -> String { + String::from("TransformAggregateDeserializer") + } + + fn as_any(&mut self) -> &mut dyn Any { + self + } + + fn event(&mut self) -> Result { + if self.output.is_finished() { + self.input.finish(); + return Ok(Event::Finished); + } + + if !self.output.can_push() { + self.input.set_not_need_data(); + return Ok(Event::NeedConsume); + } + + if self.input.has_data() { + let mut data_block = self.input.pull_data().unwrap()?; + let block_meta = data_block + .take_meta() + .and_then(AggregateSerdeMeta::downcast_from) + .unwrap(); + + self.output.push_data(Ok(DataBlock::empty_with_meta( + AggregateMeta::::create_serialized(block_meta.bucket, data_block), + ))); + return Ok(Event::NeedConsume); + } + + if self.input.is_finished() { + self.output.finish(); + return Ok(Event::Finished); + } + + self.input.set_need_data(); + Ok(Event::NeedData) + } +} + +pub type TransformGroupByDeserializer = TransformDeserializer; +pub type TransformAggregateDeserializer = TransformDeserializer; diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_serializer.rs new file mode 100644 index 0000000000000..cedcdc864d101 --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_serializer.rs @@ -0,0 +1,148 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_exception::Result; +use common_expression::types::string::StringColumnBuilder; +use common_expression::Column; +use common_expression::DataBlock; +use common_functions::aggregates::StateAddr; +use common_hashtable::HashtableEntryRefLike; +use common_hashtable::HashtableLike; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_transforms::processors::transforms::BlockMetaTransform; +use common_pipeline_transforms::processors::transforms::BlockMetaTransformer; + +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::aggregator::estimated_key_size; +use crate::pipelines::processors::transforms::aggregator::serde::serde_meta::AggregateSerdeMeta; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::group_by::KeysColumnBuilder; +use crate::pipelines::processors::AggregatorParams; + +pub struct TransformGroupBySerializer { + method: Method, +} + +impl TransformGroupBySerializer { + pub fn try_create( + input: Arc, + output: Arc, + method: Method, + ) -> Result { + Ok(ProcessorPtr::create(BlockMetaTransformer::create( + input, + output, + TransformGroupBySerializer { method }, + ))) + } +} + +impl BlockMetaTransform> for TransformGroupBySerializer +where Method: HashMethodBounds +{ + const NAME: &'static str = "TransformGroupBySerializer"; + + fn transform(&mut self, meta: AggregateMeta) -> Result { + match meta { + AggregateMeta::Partitioned { .. } => unreachable!(), + AggregateMeta::Serialized(_) => unreachable!(), + AggregateMeta::HashTable(payload) => { + let keys_len = payload.cell.hashtable.len(); + let value_size = estimated_key_size(&payload.cell.hashtable); + let mut group_key_builder = self.method.keys_column_builder(keys_len, value_size); + + for group_entity in payload.cell.hashtable.iter() { + group_key_builder.append_value(group_entity.key()); + } + + let data_block = DataBlock::new_from_columns(vec![group_key_builder.finish()]); + data_block.add_meta(Some(AggregateSerdeMeta::create(payload.bucket))) + } + } + } +} + +pub struct TransformAggregateSerializer { + method: Method, + params: Arc, +} + +impl TransformAggregateSerializer { + pub fn try_create( + input: Arc, + output: Arc, + method: Method, + params: Arc, + ) -> Result { + Ok(ProcessorPtr::create(BlockMetaTransformer::create( + input, + output, + TransformAggregateSerializer { method, params }, + ))) + } +} + +impl BlockMetaTransform> + for TransformAggregateSerializer +where Method: HashMethodBounds +{ + const NAME: &'static str = "TransformAggregateSerializer"; + + fn transform(&mut self, meta: AggregateMeta) -> Result { + match meta { + AggregateMeta::Partitioned { .. } => unreachable!(), + AggregateMeta::Serialized(_) => unreachable!(), + AggregateMeta::HashTable(payload) => { + let keys_len = payload.cell.hashtable.len(); + let value_size = estimated_key_size(&payload.cell.hashtable); + + let funcs = &self.params.aggregate_functions; + let offsets_aggregate_states = &self.params.offsets_aggregate_states; + + // Builders. + let mut state_builders = (0..funcs.len()) + .map(|_| StringColumnBuilder::with_capacity(keys_len, keys_len * 4)) + .collect::>(); + + let mut group_key_builder = self.method.keys_column_builder(keys_len, value_size); + + for group_entity in payload.cell.hashtable.iter() { + let place = Into::::into(*group_entity.get()); + + for (idx, func) in funcs.iter().enumerate() { + let arg_place = place.next(offsets_aggregate_states[idx]); + func.serialize(arg_place, &mut state_builders[idx].data)?; + state_builders[idx].commit_row(); + } + + group_key_builder.append_value(group_entity.key()); + } + + let mut columns = Vec::with_capacity(state_builders.len() + 1); + + for builder in state_builders.into_iter() { + columns.push(Column::String(builder.build())); + } + + columns.push(group_key_builder.finish()); + let data_block = DataBlock::new_from_columns(columns); + data_block.add_meta(Some(AggregateSerdeMeta::create(payload.bucket))) + } + } + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs new file mode 100644 index 0000000000000..c16189f47fed3 --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs @@ -0,0 +1,245 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::BorrowMut; +use std::sync::Arc; + +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::ColumnBuilder; +use common_expression::DataBlock; +use common_functions::aggregates::StateAddr; +use common_hashtable::HashtableEntryMutRefLike; +use common_hashtable::HashtableEntryRefLike; +use common_hashtable::HashtableLike; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_transforms::processors::transforms::BlockMetaTransform; +use common_pipeline_transforms::processors::transforms::BlockMetaTransformer; + +use crate::pipelines::processors::transforms::aggregator::aggregate_cell::AggregateHashTableDropper; +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::aggregator::estimated_key_size; +use crate::pipelines::processors::transforms::group_by::GroupColumnsBuilder; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::group_by::KeysColumnIter; +use crate::pipelines::processors::transforms::HashTableCell; +use crate::pipelines::processors::AggregatorParams; + +pub struct TransformFinalAggregate { + method: Method, + params: Arc, +} + +impl TransformFinalAggregate { + pub fn try_create( + input: Arc, + output: Arc, + method: Method, + params: Arc, + ) -> Result { + Ok(ProcessorPtr::create(BlockMetaTransformer::create( + input, + output, + TransformFinalAggregate:: { method, params }, + ))) + } +} + +impl BlockMetaTransform> for TransformFinalAggregate +where Method: HashMethodBounds +{ + const NAME: &'static str = "TransformFinalAggregate"; + + fn transform(&mut self, meta: AggregateMeta) -> Result { + if let AggregateMeta::Partitioned { bucket, data } = meta { + let mut reach_limit = false; + let hashtable = self.method.create_hash_table::()?; + let _dropper = AggregateHashTableDropper::create(self.params.clone()); + let mut hash_cell = HashTableCell::::create(hashtable, _dropper); + let temp_place = self.params.alloc_layout(&mut hash_cell.arena); + hash_cell.temp_values.push(temp_place.addr()); + + for bucket_data in data { + match bucket_data { + AggregateMeta::Partitioned { .. } => unreachable!(), + AggregateMeta::Serialized(payload) => { + debug_assert!(bucket == payload.bucket); + + let aggregate_function_len = self.params.aggregate_functions.len(); + + let column = payload.get_group_by_column(); + let keys_iter = self.method.keys_iter_from_column(column)?; + + // first state places of current block + let places = { + let keys_iter = keys_iter.iter(); + let (len, _) = keys_iter.size_hint(); + let mut places = Vec::with_capacity(len); + + let mut current_len = hash_cell.hashtable.len(); + unsafe { + for (row, key) in keys_iter.enumerate() { + if reach_limit { + let entry = hash_cell.hashtable.entry(key); + if let Some(entry) = entry { + let place = Into::::into(*entry.get()); + places.push((row, place)); + } + continue; + } + + match hash_cell.hashtable.insert_and_entry(key) { + Ok(mut entry) => { + let place = + self.params.alloc_layout(&mut hash_cell.arena); + places.push((row, place)); + + *entry.get_mut() = place.addr(); + + if let Some(limit) = self.params.limit { + current_len += 1; + if current_len >= limit { + reach_limit = true; + } + } + } + Err(entry) => { + let place = Into::::into(*entry.get()); + places.push((row, place)); + } + } + } + } + + places + }; + + let states_columns = (0..aggregate_function_len) + .map(|i| payload.data_block.get_by_offset(i)) + .collect::>(); + let mut states_binary_columns = Vec::with_capacity(states_columns.len()); + + for agg in states_columns.iter().take(aggregate_function_len) { + let aggr_column = + agg.value.as_column().unwrap().as_string().ok_or_else(|| { + ErrorCode::IllegalDataType(format!( + "Aggregation column should be StringType, but got {:?}", + agg.value + )) + })?; + states_binary_columns.push(aggr_column); + } + + let aggregate_functions = &self.params.aggregate_functions; + let offsets_aggregate_states = &self.params.offsets_aggregate_states; + + for (row, place) in places.iter() { + for (idx, aggregate_function) in aggregate_functions.iter().enumerate() + { + let final_place = place.next(offsets_aggregate_states[idx]); + let state_place = temp_place.next(offsets_aggregate_states[idx]); + + let mut data = + unsafe { states_binary_columns[idx].index_unchecked(*row) }; + aggregate_function.deserialize(state_place, &mut data)?; + aggregate_function.merge(final_place, state_place)?; + } + } + } + AggregateMeta::HashTable(payload) => unsafe { + debug_assert!(bucket == payload.bucket); + + let aggregate_functions = &self.params.aggregate_functions; + let offsets_aggregate_states = &self.params.offsets_aggregate_states; + + for entry in payload.cell.hashtable.iter() { + let place = match hash_cell.hashtable.insert(entry.key()) { + Err(place) => StateAddr::new(*place), + Ok(entry) => { + let place = self.params.alloc_layout(&mut hash_cell.arena); + entry.write(place.addr()); + place + } + }; + + let old_place = StateAddr::new(*entry.get()); + for (idx, aggregate_function) in aggregate_functions.iter().enumerate() + { + let final_place = place.next(offsets_aggregate_states[idx]); + let state_place = old_place.next(offsets_aggregate_states[idx]); + aggregate_function.merge(final_place, state_place)?; + } + } + }, + } + } + + let keys_len = hash_cell.hashtable.len(); + let value_size = estimated_key_size(&hash_cell.hashtable); + + let mut group_columns_builder = + self.method + .group_columns_builder(keys_len, value_size, &self.params); + + let aggregate_functions = &self.params.aggregate_functions; + let offsets_aggregate_states = &self.params.offsets_aggregate_states; + + let mut aggregates_column_builder = { + let mut values = vec![]; + for aggregate_function in aggregate_functions { + let data_type = aggregate_function.return_type()?; + let builder = ColumnBuilder::with_capacity(&data_type, keys_len); + values.push(builder) + } + values + }; + + let mut places = Vec::with_capacity(keys_len); + for group_entity in hash_cell.hashtable.iter() { + places.push(StateAddr::new(*group_entity.get())); + group_columns_builder.append_value(group_entity.key()); + } + + for (idx, aggregate_function) in aggregate_functions.iter().enumerate() { + let builder = aggregates_column_builder[idx].borrow_mut(); + + if idx > 0 { + for place in places.iter_mut() { + *place = place.next( + offsets_aggregate_states[idx] - offsets_aggregate_states[idx - 1], + ); + } + } + aggregate_function.batch_merge_result(&places, builder)?; + } + + // Build final state block. + let mut columns = aggregates_column_builder + .into_iter() + .map(|builder| builder.build()) + .collect::>(); + + let group_columns = group_columns_builder.finish()?; + columns.extend_from_slice(&group_columns); + + return Ok(DataBlock::new_from_columns(columns)); + } + + Err(ErrorCode::Internal( + "TransformFinalAggregate only recv AggregateMeta::Partitioned", + )) + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs new file mode 100644 index 0000000000000..52000529440b0 --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs @@ -0,0 +1,273 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::vec; + +use common_catalog::table_context::TableContext; +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::Column; +use common_expression::DataBlock; +use common_functions::aggregates::StateAddr; +use common_functions::aggregates::StateAddrs; +use common_hashtable::HashtableEntryMutRefLike; +use common_hashtable::HashtableLike; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::Processor; +use common_pipeline_transforms::processors::transforms::AccumulatingTransform; +use common_pipeline_transforms::processors::transforms::AccumulatingTransformer; + +use crate::pipelines::processors::transforms::aggregator::aggregate_cell::AggregateHashTableDropper; +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::group_by::PartitionedHashMethod; +use crate::pipelines::processors::transforms::HashTableCell; +use crate::pipelines::processors::transforms::PartitionedHashTableDropper; +use crate::pipelines::processors::AggregatorParams; +use crate::sessions::QueryContext; + +#[allow(clippy::enum_variant_names)] +enum HashTable { + MovedOut, + HashTable(HashTableCell), + PartitionedHashTable(HashTableCell, usize>), +} + +impl Default for HashTable { + fn default() -> Self { + Self::MovedOut + } +} + +struct GroupBySettings { + convert_threshold: usize, + spilling_bytes_threshold_per_proc: usize, +} + +impl TryFrom> for GroupBySettings { + type Error = ErrorCode; + + fn try_from(ctx: Arc) -> std::result::Result { + let settings = ctx.get_settings(); + let convert_threshold = settings.get_group_by_two_level_threshold()? as usize; + Ok(GroupBySettings { + convert_threshold, + spilling_bytes_threshold_per_proc: usize::MAX, + }) + } +} + +// SELECT column_name, agg(xxx) FROM table_name GROUP BY column_name +pub struct TransformPartialAggregate { + method: Method, + settings: GroupBySettings, + hash_table: HashTable, + + params: Arc, +} + +impl TransformPartialAggregate { + #[allow(dead_code)] + pub fn try_create( + ctx: Arc, + method: Method, + input: Arc, + output: Arc, + params: Arc, + ) -> Result> { + let hashtable = method.create_hash_table()?; + let _dropper = AggregateHashTableDropper::create(params.clone()); + let hashtable = HashTableCell::create(hashtable, _dropper); + + let hash_table = match !Method::SUPPORT_PARTITIONED || !params.has_distinct_combinator() { + true => HashTable::HashTable(hashtable), + false => HashTable::PartitionedHashTable(PartitionedHashMethod::convert_hashtable( + &method, hashtable, + )?), + }; + + Ok(AccumulatingTransformer::create( + input, + output, + TransformPartialAggregate:: { + method, + params, + hash_table, + settings: GroupBySettings::try_from(ctx)?, + }, + )) + } + + // Block should be `convert_to_full`. + #[inline(always)] + fn aggregate_arguments( + block: &DataBlock, + params: &Arc, + ) -> Result>> { + let aggregate_functions_arguments = ¶ms.aggregate_functions_arguments; + let mut aggregate_arguments_columns = + Vec::with_capacity(aggregate_functions_arguments.len()); + for function_arguments in aggregate_functions_arguments { + let mut function_arguments_column = Vec::with_capacity(function_arguments.len()); + + for argument_index in function_arguments { + // Unwrap safety: chunk has been `convert_to_full`. + let argument_column = block + .get_by_offset(*argument_index) + .value + .as_column() + .unwrap(); + function_arguments_column.push(argument_column.clone()); + } + + aggregate_arguments_columns.push(function_arguments_column); + } + + Ok(aggregate_arguments_columns) + } + + #[inline(always)] + #[allow(clippy::ptr_arg)] // &[StateAddr] slower than &StateAddrs ~20% + fn execute( + params: &Arc, + block: &DataBlock, + places: &StateAddrs, + ) -> Result<()> { + let aggregate_functions = ¶ms.aggregate_functions; + let offsets_aggregate_states = ¶ms.offsets_aggregate_states; + let aggregate_arguments_columns = Self::aggregate_arguments(block, params)?; + + // This can beneficial for the case of dereferencing + // This will help improve the performance ~hundreds of megabits per second + let aggr_arg_columns_slice = &aggregate_arguments_columns; + + for index in 0..aggregate_functions.len() { + let rows = block.num_rows(); + let function = &aggregate_functions[index]; + let state_offset = offsets_aggregate_states[index]; + let function_arguments = &aggr_arg_columns_slice[index]; + function.accumulate_keys(places, state_offset, function_arguments, rows)?; + } + + Ok(()) + } + + fn execute_one_block(&mut self, block: DataBlock) -> Result<()> { + let block = block.convert_to_full(); + + let group_columns = self + .params + .group_columns + .iter() + .map(|&index| block.get_by_offset(index)) + .collect::>(); + + let group_columns = group_columns + .iter() + .map(|c| (c.value.as_column().unwrap().clone(), c.data_type.clone())) + .collect::>(); + + unsafe { + let rows_num = block.num_rows(); + let state = self.method.build_keys_state(&group_columns, rows_num)?; + + match &mut self.hash_table { + HashTable::MovedOut => unreachable!(), + HashTable::HashTable(hashtable) => { + let mut places = Vec::with_capacity(rows_num); + + for key in self.method.build_keys_iter(&state)? { + places.push(match hashtable.hashtable.insert_and_entry(key) { + Err(entry) => Into::::into(*entry.get()), + Ok(mut entry) => { + let place = self.params.alloc_layout(&mut hashtable.arena); + *entry.get_mut() = place.addr(); + place + } + }) + } + + Self::execute(&self.params, &block, &places) + } + HashTable::PartitionedHashTable(hashtable) => { + let mut places = Vec::with_capacity(rows_num); + + for key in self.method.build_keys_iter(&state)? { + places.push(match hashtable.hashtable.insert_and_entry(key) { + Err(entry) => Into::::into(*entry.get()), + Ok(mut entry) => { + let place = self.params.alloc_layout(&mut hashtable.arena); + *entry.get_mut() = place.addr(); + place + } + }) + } + + Self::execute(&self.params, &block, &places) + } + } + } + } +} + +impl AccumulatingTransform for TransformPartialAggregate { + const NAME: &'static str = "TransformPartialAggregate"; + + fn transform(&mut self, block: DataBlock) -> Result> { + self.execute_one_block(block)?; + + #[allow(clippy::collapsible_if)] + if Method::SUPPORT_PARTITIONED { + if matches!(&self.hash_table, HashTable::HashTable(cell) + if cell.hashtable.len() >= self.settings.convert_threshold || + cell.hashtable.bytes_len() >= self.settings.spilling_bytes_threshold_per_proc + ) { + if let HashTable::HashTable(cell) = std::mem::take(&mut self.hash_table) { + self.hash_table = HashTable::PartitionedHashTable( + PartitionedHashMethod::convert_hashtable(&self.method, cell)?, + ); + } + } + } + + Ok(vec![]) + } + + fn on_finish(&mut self, _output: bool) -> Result> { + Ok(match std::mem::take(&mut self.hash_table) { + HashTable::MovedOut => unreachable!(), + HashTable::HashTable(v) => match v.hashtable.len() == 0 { + true => vec![], + false => vec![DataBlock::empty_with_meta( + AggregateMeta::::create_hashtable(-1, v), + )], + }, + HashTable::PartitionedHashTable(v) => { + let cells = PartitionedHashTableDropper::split_cell(v); + let mut blocks = Vec::with_capacity(cells.len()); + for (bucket, cell) in cells.into_iter().enumerate() { + if cell.hashtable.len() != 0 { + blocks.push(DataBlock::empty_with_meta( + AggregateMeta::::create_hashtable(bucket as isize, cell), + )); + } + } + + blocks + } + }) + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_group_by_final.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_group_by_final.rs new file mode 100644 index 0000000000000..70c411347a89c --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_group_by_final.rs @@ -0,0 +1,117 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::DataBlock; +use common_hashtable::HashtableEntryRefLike; +use common_hashtable::HashtableLike; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_transforms::processors::transforms::BlockMetaTransform; +use common_pipeline_transforms::processors::transforms::BlockMetaTransformer; + +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::aggregator::estimated_key_size; +use crate::pipelines::processors::transforms::group_by::GroupColumnsBuilder; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::group_by::KeysColumnIter; +use crate::pipelines::processors::AggregatorParams; + +pub struct TransformFinalGroupBy { + method: Method, + params: Arc, +} + +impl TransformFinalGroupBy { + pub fn try_create( + input: Arc, + output: Arc, + method: Method, + params: Arc, + ) -> Result { + Ok(ProcessorPtr::create(BlockMetaTransformer::create( + input, + output, + TransformFinalGroupBy:: { method, params }, + ))) + } +} + +impl BlockMetaTransform> for TransformFinalGroupBy +where Method: HashMethodBounds +{ + const NAME: &'static str = "TransformFinalGroupBy"; + + fn transform(&mut self, meta: AggregateMeta) -> Result { + if let AggregateMeta::Partitioned { bucket, data } = meta { + let mut hashtable = self.method.create_hash_table::<()>()?; + 'merge_hashtable: for bucket_data in data { + match bucket_data { + AggregateMeta::Partitioned { .. } => unreachable!(), + AggregateMeta::Serialized(payload) => { + debug_assert!(bucket == payload.bucket); + let column = payload.get_group_by_column(); + let keys_iter = self.method.keys_iter_from_column(column)?; + + unsafe { + for key in keys_iter.iter() { + let _ = hashtable.insert_and_entry(key); + } + + if let Some(limit) = self.params.limit { + if hashtable.len() >= limit { + break 'merge_hashtable; + } + } + } + } + AggregateMeta::HashTable(payload) => unsafe { + debug_assert!(bucket == payload.bucket); + + for key in payload.cell.hashtable.iter() { + let _ = hashtable.insert_and_entry(key.key()); + } + + if let Some(limit) = self.params.limit { + if hashtable.len() >= limit { + break 'merge_hashtable; + } + } + }, + } + } + + let value_size = estimated_key_size(&hashtable); + let keys_len = hashtable.len(); + + let mut group_columns_builder = + self.method + .group_columns_builder(keys_len, value_size, &self.params); + + for group_entity in hashtable.iter() { + group_columns_builder.append_value(group_entity.key()); + } + + return Ok(DataBlock::new_from_columns(group_columns_builder.finish()?)); + } + + Err(ErrorCode::Internal( + "TransformFinalGroupBy only recv AggregateMeta::Partitioned", + )) + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_group_by_partial.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_group_by_partial.rs new file mode 100644 index 0000000000000..2640eabcb1c56 --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_group_by_partial.rs @@ -0,0 +1,179 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::vec; + +use common_catalog::table_context::TableContext; +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::DataBlock; +use common_hashtable::HashtableLike; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::Processor; +use common_pipeline_transforms::processors::transforms::AccumulatingTransform; +use common_pipeline_transforms::processors::transforms::AccumulatingTransformer; +use common_sql::IndexType; + +use crate::pipelines::processors::transforms::aggregator::aggregate_cell::GroupByHashTableDropper; +use crate::pipelines::processors::transforms::aggregator::aggregate_cell::HashTableCell; +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::group_by::PartitionedHashMethod; +use crate::pipelines::processors::transforms::PartitionedHashTableDropper; +use crate::pipelines::processors::AggregatorParams; +use crate::sessions::QueryContext; + +#[allow(clippy::enum_variant_names)] +enum HashTable { + MovedOut, + HashTable(HashTableCell), + PartitionedHashTable(HashTableCell, ()>), +} + +impl Default for HashTable { + fn default() -> Self { + Self::MovedOut + } +} + +struct GroupBySettings { + convert_threshold: usize, + spilling_bytes_threshold_per_proc: usize, +} + +impl TryFrom> for GroupBySettings { + type Error = ErrorCode; + + fn try_from(ctx: Arc) -> std::result::Result { + let settings = ctx.get_settings(); + let convert_threshold = settings.get_group_by_two_level_threshold()? as usize; + Ok(GroupBySettings { + convert_threshold, + spilling_bytes_threshold_per_proc: usize::MAX, + }) + } +} + +// SELECT column_name FROM table_name GROUP BY column_name +pub struct TransformPartialGroupBy { + method: Method, + hash_table: HashTable, + group_columns: Vec, + settings: GroupBySettings, +} + +impl TransformPartialGroupBy { + pub fn try_create( + ctx: Arc, + method: Method, + input: Arc, + output: Arc, + params: Arc, + ) -> Result> { + let hashtable = method.create_hash_table()?; + let _dropper = GroupByHashTableDropper::::create(); + let hash_table = HashTable::HashTable(HashTableCell::create(hashtable, _dropper)); + + Ok(AccumulatingTransformer::create( + input, + output, + TransformPartialGroupBy:: { + method, + hash_table, + group_columns: params.group_columns.clone(), + settings: GroupBySettings::try_from(ctx)?, + }, + )) + } +} + +impl AccumulatingTransform for TransformPartialGroupBy { + const NAME: &'static str = "TransformPartialGroupBy"; + + fn transform(&mut self, block: DataBlock) -> Result> { + let block = block.convert_to_full(); + let group_columns = self + .group_columns + .iter() + .map(|&index| block.get_by_offset(index)) + .collect::>(); + + let group_columns = group_columns + .iter() + .map(|c| (c.value.as_column().unwrap().clone(), c.data_type.clone())) + .collect::>(); + + unsafe { + let rows_num = block.num_rows(); + let state = self.method.build_keys_state(&group_columns, rows_num)?; + + match &mut self.hash_table { + HashTable::MovedOut => unreachable!(), + HashTable::HashTable(cell) => { + for key in self.method.build_keys_iter(&state)? { + let _ = cell.hashtable.insert_and_entry(key); + } + } + HashTable::PartitionedHashTable(cell) => { + for key in self.method.build_keys_iter(&state)? { + let _ = cell.hashtable.insert_and_entry(key); + } + } + }; + + #[allow(clippy::collapsible_if)] + if Method::SUPPORT_PARTITIONED { + if matches!(&self.hash_table, HashTable::HashTable(cell) + if cell.hashtable.len() >= self.settings.convert_threshold || + cell.hashtable.bytes_len() >= self.settings.spilling_bytes_threshold_per_proc + ) { + if let HashTable::HashTable(cell) = std::mem::take(&mut self.hash_table) { + self.hash_table = HashTable::PartitionedHashTable( + PartitionedHashMethod::convert_hashtable(&self.method, cell)?, + ); + } + } + } + } + + Ok(vec![]) + } + + fn on_finish(&mut self, _output: bool) -> Result> { + Ok(match std::mem::take(&mut self.hash_table) { + HashTable::MovedOut => unreachable!(), + HashTable::HashTable(cell) => match cell.hashtable.len() == 0 { + true => vec![], + false => vec![DataBlock::empty_with_meta( + AggregateMeta::::create_hashtable(-1, cell), + )], + }, + HashTable::PartitionedHashTable(v) => { + let cells = PartitionedHashTableDropper::split_cell(v); + let mut blocks = Vec::with_capacity(cells.len()); + for (bucket, cell) in cells.into_iter().enumerate() { + if cell.hashtable.len() != 0 { + blocks.push(DataBlock::empty_with_meta( + AggregateMeta::::create_hashtable(bucket as isize, cell), + )); + } + } + + blocks + } + }) + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_partition_bucket.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_partition_bucket.rs new file mode 100644 index 0000000000000..fd3e3dbe95e9f --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_partition_bucket.rs @@ -0,0 +1,465 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::marker::PhantomData; +use std::mem::take; +use std::sync::Arc; + +use common_catalog::table_context::TableContext; +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::with_hash_method; +use common_expression::BlockMetaInfoDowncast; +use common_expression::DataBlock; +use common_expression::HashMethodKind; +use common_hashtable::hash2bucket; +use common_hashtable::HashtableLike; +use common_pipeline_core::pipe::Pipe; +use common_pipeline_core::pipe::PipeItem; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::Event; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_core::processors::Processor; +use common_pipeline_core::Pipeline; + +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::HashTablePayload; +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::SerializedPayload; +use crate::pipelines::processors::transforms::aggregator::TransformAggregateDeserializer; +use crate::pipelines::processors::transforms::aggregator::TransformFinalGroupBy; +use crate::pipelines::processors::transforms::group_by::HashMethodBounds; +use crate::pipelines::processors::transforms::group_by::KeysColumnIter; +use crate::pipelines::processors::transforms::group_by::PartitionedHashMethod; +use crate::pipelines::processors::transforms::PartitionedHashTableDropper; +use crate::pipelines::processors::transforms::TransformFinalAggregate; +use crate::pipelines::processors::transforms::TransformGroupByDeserializer; +use crate::pipelines::processors::AggregatorParams; +use crate::sessions::QueryContext; + +static SINGLE_LEVEL_BUCKET_NUM: isize = -1; + +struct InputPortState { + port: Arc, + bucket: isize, +} + +pub struct TransformPartitionBucket { + output: Arc, + inputs: Vec, + + method: Method, + working_bucket: isize, + pushing_bucket: isize, + initialized_all_inputs: bool, + buckets_blocks: BTreeMap>, + unsplitted_blocks: Vec, + _phantom: PhantomData, +} + +impl + TransformPartitionBucket +{ + pub fn create(method: Method, input_nums: usize) -> Result { + let mut inputs = Vec::with_capacity(input_nums); + + for _index in 0..input_nums { + inputs.push(InputPortState { + bucket: -1, + port: InputPort::create(), + }); + } + + Ok(TransformPartitionBucket { + method, + // params, + inputs, + working_bucket: 0, + pushing_bucket: 0, + output: OutputPort::create(), + buckets_blocks: BTreeMap::new(), + unsplitted_blocks: vec![], + initialized_all_inputs: false, + _phantom: Default::default(), + }) + } + + pub fn get_inputs(&self) -> Vec> { + let mut inputs = Vec::with_capacity(self.inputs.len()); + + for input_state in &self.inputs { + inputs.push(input_state.port.clone()); + } + + inputs + } + + pub fn get_output(&self) -> Arc { + self.output.clone() + } + + fn initialize_all_inputs(&mut self) -> Result { + self.initialized_all_inputs = true; + + for index in 0..self.inputs.len() { + if self.inputs[index].port.is_finished() { + continue; + } + + // We pull the first unsplitted data block + if self.inputs[index].bucket > SINGLE_LEVEL_BUCKET_NUM { + continue; + } + + if !self.inputs[index].port.has_data() { + self.inputs[index].port.set_need_data(); + self.initialized_all_inputs = false; + continue; + } + + let data_block = self.inputs[index].port.pull_data().unwrap()?; + self.inputs[index].bucket = self.add_bucket(data_block); + + if self.inputs[index].bucket <= SINGLE_LEVEL_BUCKET_NUM { + self.inputs[index].port.set_need_data(); + self.initialized_all_inputs = false; + } + } + + Ok(self.initialized_all_inputs) + } + + fn add_bucket(&mut self, data_block: DataBlock) -> isize { + if let Some(block_meta) = data_block.get_meta() { + if let Some(block_meta) = AggregateMeta::::downcast_ref_from(block_meta) { + let bucket = match block_meta { + AggregateMeta::Partitioned { .. } => unreachable!(), + AggregateMeta::Serialized(payload) => payload.bucket, + AggregateMeta::HashTable(payload) => payload.bucket, + }; + + if bucket > SINGLE_LEVEL_BUCKET_NUM { + match self.buckets_blocks.entry(bucket) { + Entry::Vacant(v) => { + v.insert(vec![data_block]); + } + Entry::Occupied(mut v) => { + v.get_mut().push(data_block); + } + }; + + return bucket; + } + } + } + + self.unsplitted_blocks.push(data_block); + SINGLE_LEVEL_BUCKET_NUM + } + + fn try_push_data_block(&mut self) -> bool { + match self.buckets_blocks.is_empty() { + true => self.try_push_single_level(), + false => self.try_push_two_level(), + } + } + + fn try_push_two_level(&mut self) -> bool { + while self.pushing_bucket < self.working_bucket { + if let Some(bucket_blocks) = self.buckets_blocks.remove(&self.pushing_bucket) { + let data_block = Self::convert_blocks(self.pushing_bucket, bucket_blocks); + self.output.push_data(Ok(data_block)); + self.pushing_bucket += 1; + return true; + } + + self.pushing_bucket += 1; + } + + false + } + + fn try_push_single_level(&mut self) -> bool { + if !self.unsplitted_blocks.is_empty() { + let data_blocks = take(&mut self.unsplitted_blocks); + self.output.push_data(Ok(Self::convert_blocks( + SINGLE_LEVEL_BUCKET_NUM, + data_blocks, + ))); + return true; + } + + false + } + + fn convert_blocks(bucket: isize, data_blocks: Vec) -> DataBlock { + let mut data = Vec::with_capacity(data_blocks.len()); + for mut data_block in data_blocks.into_iter() { + if let Some(block_meta) = data_block.take_meta() { + if let Some(block_meta) = AggregateMeta::::downcast_from(block_meta) { + data.push(block_meta); + } + } + } + + DataBlock::empty_with_meta(AggregateMeta::::create_partitioned(bucket, data)) + } + + fn partition_block(&self, payload: SerializedPayload) -> Result>> { + let column = payload.get_group_by_column(); + let keys_iter = self.method.keys_iter_from_column(column)?; + + let mut indices = Vec::with_capacity(payload.data_block.num_rows()); + + for key_item in keys_iter.iter() { + let hash = self.method.get_hash(key_item); + indices.push(hash2bucket::<8, true>(hash as usize) as u16); + } + + let scatter_blocks = DataBlock::scatter(&payload.data_block, &indices, 1 << 8)?; + + let mut blocks = Vec::with_capacity(scatter_blocks.len()); + for (bucket, data_block) in scatter_blocks.into_iter().enumerate() { + blocks.push(match data_block.is_empty() { + true => None, + false => Some(DataBlock::empty_with_meta( + AggregateMeta::::create_serialized(bucket as isize, data_block), + )), + }); + } + + Ok(blocks) + } + + fn partition_hashtable( + &self, + payload: HashTablePayload, + ) -> Result>> { + let temp = PartitionedHashMethod::convert_hashtable(&self.method, payload.cell)?; + let cells = PartitionedHashTableDropper::split_cell(temp); + + let mut data_blocks = Vec::with_capacity(cells.len()); + for (bucket, cell) in cells.into_iter().enumerate() { + data_blocks.push(match cell.hashtable.len() == 0 { + true => None, + false => Some(DataBlock::empty_with_meta( + AggregateMeta::::create_hashtable(bucket as isize, cell), + )), + }) + } + + Ok(data_blocks) + } +} + +#[async_trait::async_trait] +impl Processor + for TransformPartitionBucket +{ + fn name(&self) -> String { + String::from("TransformPartitionBucket") + } + + fn as_any(&mut self) -> &mut dyn Any { + self + } + + fn event(&mut self) -> Result { + if self.output.is_finished() { + for input_state in &self.inputs { + input_state.port.finish(); + } + + self.buckets_blocks.clear(); + return Ok(Event::Finished); + } + + // We pull the first unsplitted data block + if !self.initialized_all_inputs && !self.initialize_all_inputs()? { + return Ok(Event::NeedData); + } + + if !self.buckets_blocks.is_empty() && !self.unsplitted_blocks.is_empty() { + // Split data blocks if it's unsplitted. + return Ok(Event::Sync); + } + + if !self.output.can_push() { + for input_state in &self.inputs { + input_state.port.set_not_need_data(); + } + + return Ok(Event::NeedConsume); + } + + let pushed_data_block = self.try_push_data_block(); + + loop { + // Try to pull the next data or until the port is closed + let mut all_inputs_is_finished = true; + let mut all_port_prepared_data = true; + + for index in 0..self.inputs.len() { + if self.inputs[index].port.is_finished() { + continue; + } + + all_inputs_is_finished = false; + if self.inputs[index].bucket > self.working_bucket { + continue; + } + + if !self.inputs[index].port.has_data() { + all_port_prepared_data = false; + self.inputs[index].port.set_need_data(); + continue; + } + + let data_block = self.inputs[index].port.pull_data().unwrap()?; + self.inputs[index].bucket = self.add_bucket(data_block); + debug_assert!(self.unsplitted_blocks.is_empty()); + + if self.inputs[index].bucket <= self.working_bucket { + all_port_prepared_data = false; + self.inputs[index].port.set_need_data(); + } + } + + if all_inputs_is_finished { + break; + } + + if !all_port_prepared_data { + return Ok(Event::NeedData); + } + + self.working_bucket += 1; + } + + if pushed_data_block || self.try_push_data_block() { + return Ok(Event::NeedConsume); + } + + if let Some((bucket, bucket_blocks)) = self.buckets_blocks.pop_first() { + let data_block = Self::convert_blocks(bucket, bucket_blocks); + self.output.push_data(Ok(data_block)); + return Ok(Event::NeedConsume); + } + + self.output.finish(); + Ok(Event::Finished) + } + + fn process(&mut self) -> Result<()> { + let block_meta = self + .unsplitted_blocks + .pop() + .and_then(|mut block| block.take_meta()) + .and_then(AggregateMeta::::downcast_from); + + match block_meta { + None => Err(ErrorCode::Internal( + "Internal error, TransformPartitionBucket only recv AggregateMeta.", + )), + Some(agg_block_meta) => { + let data_blocks = match agg_block_meta { + AggregateMeta::Partitioned { .. } => unreachable!(), + AggregateMeta::Serialized(payload) => self.partition_block(payload)?, + AggregateMeta::HashTable(payload) => self.partition_hashtable(payload)?, + }; + + for (bucket, block) in data_blocks.into_iter().enumerate() { + if let Some(data_block) = block { + match self.buckets_blocks.entry(bucket as isize) { + Entry::Vacant(v) => { + v.insert(vec![data_block]); + } + Entry::Occupied(mut v) => { + v.get_mut().push(data_block); + } + }; + } + } + + Ok(()) + } + } + } +} + +fn build_partition_bucket( + ctx: &Arc, + method: Method, + pipeline: &mut Pipeline, + params: Arc, +) -> Result<()> { + if !ctx.get_cluster().is_empty() { + pipeline.add_transform( + |input, output| match params.aggregate_functions.is_empty() { + true => TransformGroupByDeserializer::::try_create(input, output), + false => TransformAggregateDeserializer::::try_create(input, output), + }, + )?; + } + + let input_nums = pipeline.output_len(); + let transform = TransformPartitionBucket::::create(method.clone(), input_nums)?; + + let output = transform.get_output(); + let inputs_port = transform.get_inputs(); + + pipeline.add_pipe(Pipe::create(inputs_port.len(), 1, vec![PipeItem::create( + ProcessorPtr::create(Box::new(transform)), + inputs_port, + vec![output], + )])); + + pipeline.resize(input_nums)?; + + pipeline.add_transform( + |input, output| match params.aggregate_functions.is_empty() { + true => { + TransformFinalGroupBy::try_create(input, output, method.clone(), params.clone()) + } + false => { + TransformFinalAggregate::try_create(input, output, method.clone(), params.clone()) + } + }, + ) +} + +pub fn efficiently_memory_final_aggregator( + ctx: &Arc, + params: Arc, + pipeline: &mut Pipeline, +) -> Result<()> { + let group_cols = ¶ms.group_columns; + let schema_before_group_by = params.input_schema.clone(); + let sample_block = DataBlock::empty_with_schema(schema_before_group_by); + let method = DataBlock::choose_hash_method(&sample_block, group_cols)?; + + match params.aggregate_functions.is_empty() { + true => with_hash_method!(|T| match method { + HashMethodKind::T(v) => + build_partition_bucket::<_, ()>(ctx, v, pipeline, params.clone()), + }), + false => with_hash_method!(|T| match method { + HashMethodKind::T(v) => + build_partition_bucket::<_, usize>(ctx, v, pipeline, params.clone()), + }), + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_single_key.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_single_key.rs similarity index 93% rename from src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_single_key.rs rename to src/query/service/src/pipelines/processors/transforms/aggregator/transform_single_key.rs index 20f794bb322b7..a2adf0dbe8aa5 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregator_single_key.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_single_key.rs @@ -83,7 +83,7 @@ impl PartialSingleStateAggregator { impl AccumulatingTransform for PartialSingleStateAggregator { const NAME: &'static str = "AggregatorPartialTransform"; - fn transform(&mut self, block: DataBlock) -> Result> { + fn transform(&mut self, block: DataBlock) -> Result> { let block = block.convert_to_full(); for (idx, func) in self.funcs.iter().enumerate() { @@ -102,11 +102,11 @@ impl AccumulatingTransform for PartialSingleStateAggregator { func.accumulate(place, &arg_columns, None, block.num_rows())?; } - Ok(None) + Ok(vec![]) } - fn on_finish(&mut self, generate_data: bool) -> Result> { - let mut generate_data_block = None; + fn on_finish(&mut self, generate_data: bool) -> Result> { + let mut generate_data_block = vec![]; if generate_data { let mut columns = Vec::with_capacity(self.funcs.len()); @@ -122,7 +122,7 @@ impl AccumulatingTransform for PartialSingleStateAggregator { }); } - generate_data_block = Some(DataBlock::new(columns, 1)); + generate_data_block = vec![DataBlock::new(columns, 1)]; } // destroy states @@ -188,7 +188,7 @@ impl FinalSingleStateAggregator { impl AccumulatingTransform for FinalSingleStateAggregator { const NAME: &'static str = "AggregatorFinalTransform"; - fn transform(&mut self, block: DataBlock) -> Result> { + fn transform(&mut self, block: DataBlock) -> Result> { if !block.is_empty() { let block = block.convert_to_full(); let places = self.new_places(); @@ -205,11 +205,11 @@ impl AccumulatingTransform for FinalSingleStateAggregator { } } - Ok(None) + Ok(vec![]) } - fn on_finish(&mut self, generate_data: bool) -> Result> { - let mut generate_data_block = None; + fn on_finish(&mut self, generate_data: bool) -> Result> { + let mut generate_data_block = vec![]; if generate_data { let mut aggr_values = { @@ -245,7 +245,7 @@ impl AccumulatingTransform for FinalSingleStateAggregator { } } - generate_data_block = Some(DataBlock::new_from_columns(columns)); + generate_data_block = vec![DataBlock::new_from_columns(columns)]; } for (places, func) in self.to_merge_places.iter().zip(self.funcs.iter()) { diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_builder.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_builder.rs index 74c9077793d80..06beada338d45 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_builder.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_builder.rs @@ -14,11 +14,14 @@ use std::marker::PhantomData; +use common_arrow::arrow::buffer::Buffer; +use common_expression::types::decimal::Decimal; use common_expression::types::number::Number; use common_expression::types::string::StringColumnBuilder; use common_expression::types::NumberType; use common_expression::types::ValueType; use common_expression::Column; +use ethnum::i256; use super::large_number::LargeNumber; @@ -82,8 +85,8 @@ impl<'a> KeysColumnBuilder for StringKeysColumnBuilder<'a> { } pub struct LargeFixedKeysColumnBuilder<'a, T: LargeNumber> { - pub values: Vec, - pub _t: PhantomData<&'a T>, + pub _t: PhantomData<&'a ()>, + pub values: Vec, } impl<'a, T: LargeNumber> KeysColumnBuilder for LargeFixedKeysColumnBuilder<'a, T> { @@ -91,18 +94,25 @@ impl<'a, T: LargeNumber> KeysColumnBuilder for LargeFixedKeysColumnBuilder<'a, T #[inline] fn append_value(&mut self, v: Self::T) { - let values = &mut self.values; - values.reserve(T::BYTE_SIZE); - let len = values.len(); - unsafe { values.set_len(len + T::BYTE_SIZE) } - v.serialize_to(&mut values[len..len + T::BYTE_SIZE]); + self.values.push(*v); } #[inline] fn finish(self) -> Column { - let len = self.values.len() / T::BYTE_SIZE; - let offsets = (0..=len).map(|x| (x * T::BYTE_SIZE) as u64).collect(); - let builder = StringColumnBuilder::from_data(self.values, offsets); - Column::String(builder.build()) + match T::BYTE_SIZE { + 16 => { + let values: Buffer = self.values.into(); + let values: Buffer = unsafe { std::mem::transmute(values) }; + let col = i128::to_column_from_buffer(values, i128::default_decimal_size()); + Column::Decimal(col) + } + 32 => { + let values: Buffer = self.values.into(); + let values: Buffer = unsafe { std::mem::transmute(values) }; + let col = i256::to_column_from_buffer(values, i256::default_decimal_size()); + Column::Decimal(col) + } + _ => unreachable!(), + } } } diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_iter.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_iter.rs index 0254519e6d360..56f7a2237df61 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_iter.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_keys_iter.rs @@ -52,18 +52,12 @@ impl KeysColumnIter for FixedKeysColumnIter { } pub struct LargeFixedKeysColumnIter { - holder: Vec, + holder: Buffer, } impl LargeFixedKeysColumnIter { - pub fn create(inner: &StringColumn) -> Result { - let mut array = Vec::with_capacity(inner.len()); - - for bs in inner.iter() { - array.push(T::from_bytes(bs)?); - } - - Ok(Self { holder: array }) + pub fn create(holder: Buffer) -> Result { + Ok(Self { holder }) } } diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs index 71082ec5cbfcd..87cf36a6f5eac 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_polymorphic_keys.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::marker::PhantomData; +use std::time::Instant; use common_exception::ErrorCode; use common_exception::Result; @@ -24,19 +25,20 @@ use common_expression::HashMethod; use common_expression::HashMethodFixedKeys; use common_expression::HashMethodKeysU128; use common_expression::HashMethodKeysU256; -use common_expression::HashMethodKeysU512; use common_expression::HashMethodSerializer; use common_expression::HashMethodSingleString; use common_expression::KeysState; use common_hashtable::FastHash; use common_hashtable::HashMap; +use common_hashtable::HashtableEntryMutRefLike; +use common_hashtable::HashtableEntryRefLike; use common_hashtable::HashtableLike; use common_hashtable::LookupHashMap; use common_hashtable::PartitionedHashMap; use common_hashtable::ShortStringHashMap; use common_hashtable::StringHashMap; -use primitive_types::U256; -use primitive_types::U512; +use ethnum::U256; +use tracing::info; use super::aggregator_keys_builder::LargeFixedKeysColumnBuilder; use super::aggregator_keys_iter::LargeFixedKeysColumnIter; @@ -50,6 +52,10 @@ use crate::pipelines::processors::transforms::group_by::aggregator_keys_builder: use crate::pipelines::processors::transforms::group_by::aggregator_keys_iter::FixedKeysColumnIter; use crate::pipelines::processors::transforms::group_by::aggregator_keys_iter::KeysColumnIter; use crate::pipelines::processors::transforms::group_by::aggregator_keys_iter::SerializedKeysColumnIter; +use crate::pipelines::processors::transforms::group_by::Area; +use crate::pipelines::processors::transforms::group_by::ArenaHolder; +use crate::pipelines::processors::transforms::HashTableCell; +use crate::pipelines::processors::transforms::PartitionedHashTableDropper; use crate::pipelines::processors::AggregatorParams; // Provide functions for all HashMethod to help implement polymorphic group by key @@ -293,11 +299,16 @@ impl PolymorphicKeysHelper for HashMethodKeysU128 { type KeysColumnIter = LargeFixedKeysColumnIter; fn keys_iter_from_column(&self, column: &Column) -> Result { - LargeFixedKeysColumnIter::create(column.as_string().ok_or_else(|| { - ErrorCode::IllegalDataType( - "Illegal data type for LargeFixedKeysColumnIter".to_string(), - ) - })?) + let buffer = column + .as_decimal() + .and_then(|c| c.as_decimal128()) + .ok_or_else(|| { + ErrorCode::IllegalDataType( + "Illegal data type for LargeFixedKeysColumnIter".to_string(), + ) + })?; + let buffer = unsafe { std::mem::transmute(buffer.0.clone()) }; + LargeFixedKeysColumnIter::create(buffer) } type GroupColumnsBuilder<'a> = FixedKeysGroupColumnsBuilder<'a, u128>; @@ -334,11 +345,17 @@ impl PolymorphicKeysHelper for HashMethodKeysU256 { type KeysColumnIter = LargeFixedKeysColumnIter; fn keys_iter_from_column(&self, column: &Column) -> Result { - LargeFixedKeysColumnIter::create(column.as_string().ok_or_else(|| { - ErrorCode::IllegalDataType( - "Illegal data type for LargeFixedKeysColumnIter".to_string(), - ) - })?) + let buffer = column + .as_decimal() + .and_then(|c| c.as_decimal256()) + .ok_or_else(|| { + ErrorCode::IllegalDataType( + "Illegal data type for LargeFixedKeysColumnIter".to_string(), + ) + })?; + let buffer = unsafe { std::mem::transmute(buffer.0.clone()) }; + + LargeFixedKeysColumnIter::create(buffer) } type GroupColumnsBuilder<'a> = FixedKeysGroupColumnsBuilder<'a, U256>; @@ -356,47 +373,6 @@ impl PolymorphicKeysHelper for HashMethodKeysU256 { } } -impl PolymorphicKeysHelper for HashMethodKeysU512 { - const SUPPORT_PARTITIONED: bool = true; - - type HashTable = HashMap; - - fn create_hash_table(&self) -> Result> { - Ok(HashMap::new()) - } - - type ColumnBuilder<'a> = LargeFixedKeysColumnBuilder<'a, U512>; - fn keys_column_builder(&self, capacity: usize, _: usize) -> LargeFixedKeysColumnBuilder { - LargeFixedKeysColumnBuilder { - _t: PhantomData::default(), - values: Vec::with_capacity(capacity * 64), - } - } - - type KeysColumnIter = LargeFixedKeysColumnIter; - fn keys_iter_from_column(&self, column: &Column) -> Result { - LargeFixedKeysColumnIter::create(column.as_string().ok_or_else(|| { - ErrorCode::IllegalDataType( - "Illegal data type for LargeFixedKeysColumnIter".to_string(), - ) - })?) - } - - type GroupColumnsBuilder<'a> = FixedKeysGroupColumnsBuilder<'a, U512>; - fn group_columns_builder( - &self, - capacity: usize, - _data_capacity: usize, - params: &AggregatorParams, - ) -> FixedKeysGroupColumnsBuilder { - FixedKeysGroupColumnsBuilder::create(capacity, params) - } - - fn get_hash(&self, v: &U512) -> u64 { - v.fast_hash() - } -} - impl PolymorphicKeysHelper for HashMethodSingleString { const SUPPORT_PARTITIONED: bool = true; @@ -478,17 +454,66 @@ impl PolymorphicKeysHelper for HashMethodSerializer { } #[derive(Clone)] -pub struct PartitionedHashMethod { +pub struct PartitionedHashMethod { pub(crate) method: Method, } -impl PartitionedHashMethod { +impl PartitionedHashMethod { pub fn create(method: Method) -> PartitionedHashMethod { PartitionedHashMethod:: { method } } + + pub fn convert_hashtable( + method: &Method, + mut cell: HashTableCell, + ) -> Result, T>> + where + T: Copy + Send + Sync + 'static, + Self: PolymorphicKeysHelper>, + { + let instant = Instant::now(); + let partitioned_method = Self::create(method.clone()); + let mut partitioned_hashtable = partitioned_method.create_hash_table()?; + + unsafe { + for item in cell.hashtable.iter() { + match partitioned_hashtable.insert_and_entry(item.key()) { + Ok(mut entry) => { + *entry.get_mut() = *item.get(); + } + Err(mut entry) => { + *entry.get_mut() = *item.get(); + } + }; + } + } + + info!( + "Convert to Partitioned HashTable elapsed: {:?}", + instant.elapsed() + ); + + let arena = std::mem::replace(&mut cell.arena, Area::create()); + cell.arena_holders.push(ArenaHolder::create(Some(arena))); + let temp_values = cell.temp_values.to_vec(); + let arena_holders = cell.arena_holders.to_vec(); + + let _old_dropper = cell._dropper.clone().unwrap(); + let _new_dropper = PartitionedHashTableDropper::::create(_old_dropper); + + // TODO(winter): No idea(may memory leak). + // We need to ensure that the following two lines of code are atomic. + // take_old_dropper before create new HashTableCell - may memory leak + // create new HashTableCell before take_old_dropper - may double free memory + let _old_dropper = cell._dropper.take(); + let mut cell = HashTableCell::create(partitioned_hashtable, _new_dropper); + cell.temp_values = temp_values; + cell.arena_holders = arena_holders; + Ok(cell) + } } -impl HashMethod for PartitionedHashMethod { +impl HashMethod for PartitionedHashMethod { type HashKey = Method::HashKey; type HashKeyIter<'a> = Method::HashKeyIter<'a> where Self: 'a; diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs index cf94f65c35284..a61a7b05565ba 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/aggregator_state.rs @@ -42,7 +42,6 @@ pub struct ArenaHolder { impl ArenaHolder { pub fn create(area: Option) -> ArenaHolder { - tracing::info!("Putting one arena into holder"); ArenaHolder { _data: Arc::new(area), } @@ -56,4 +55,5 @@ impl Debug for ArenaHolder { } unsafe impl Send for ArenaHolder {} + unsafe impl Sync for ArenaHolder {} diff --git a/src/query/service/src/pipelines/processors/transforms/group_by/large_number.rs b/src/query/service/src/pipelines/processors/transforms/group_by/large_number.rs index f988a8c84a15b..bcb19769d4d84 100644 --- a/src/query/service/src/pipelines/processors/transforms/group_by/large_number.rs +++ b/src/query/service/src/pipelines/processors/transforms/group_by/large_number.rs @@ -12,53 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_exception::ErrorCode; -use common_exception::Result; -use primitive_types::U256; -use primitive_types::U512; +use ethnum::U256; -pub trait LargeNumber: Default + Sized + 'static { +pub trait LargeNumber: Default + Copy + Sized + 'static { const BYTE_SIZE: usize; - fn serialize_to(&self, _bytes: &mut [u8]); - fn from_bytes(v: &[u8]) -> Result; } impl LargeNumber for u128 { const BYTE_SIZE: usize = 16; - fn serialize_to(&self, bytes: &mut [u8]) { - let bs = self.to_le_bytes(); - bytes.copy_from_slice(&bs); - } - - fn from_bytes(v: &[u8]) -> Result { - let bs: [u8; 16] = v.try_into().map_err(|_| { - ErrorCode::StrParseError(format!( - "Unable to parse into u128, unexpected byte size: {}", - v.len() - )) - })?; - Ok(u128::from_le_bytes(bs)) - } } impl LargeNumber for U256 { const BYTE_SIZE: usize = 32; - fn serialize_to(&self, bytes: &mut [u8]) { - self.to_little_endian(bytes); - } - - fn from_bytes(v: &[u8]) -> Result { - Ok(U256::from_little_endian(v)) - } -} - -impl LargeNumber for U512 { - const BYTE_SIZE: usize = 64; - fn serialize_to(&self, bytes: &mut [u8]) { - self.to_little_endian(bytes); - } - - fn from_bytes(v: &[u8]) -> Result { - Ok(U512::from_little_endian(v)) - } } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state_impl.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state_impl.rs index 44cb3b89e32fb..0fdec8331d9fb 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state_impl.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state_impl.rs @@ -216,9 +216,6 @@ impl HashJoinState for JoinHashTable { HashTable::KeysU256(table) => insert_key! { &mut table.hash_table, &markers, &table.hash_method,chunk,columns,chunk_index, }, - HashTable::KeysU512(table) => insert_key! { - &mut table.hash_table, &markers, &table.hash_method,chunk,columns,chunk_index, - }, } } Ok(()) diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs index 7755800b798c9..49b9f14d13187 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs @@ -37,9 +37,8 @@ use common_hashtable::HashtableKeyable; use common_hashtable::ShortStringHashMap; use common_hashtable::StringHashMap; use common_sql::plans::JoinType; +use ethnum::U256; use parking_lot::RwLock; -use primitive_types::U256; -use primitive_types::U512; use super::ProbeState; use crate::pipelines::processors::transforms::hash_join::desc::HashJoinDesc; @@ -74,7 +73,6 @@ pub enum HashTable { KeysU64(FixedKeyHashTable), KeysU128(FixedKeyHashTable), KeysU256(FixedKeyHashTable), - KeysU512(FixedKeyHashTable), } pub struct JoinHashTable { @@ -186,16 +184,6 @@ impl JoinHashTable { probe_schema, hash_join_desc, )?), - HashMethodKind::KeysU512(hash_method) => Arc::new(JoinHashTable::try_create( - ctx, - HashTable::KeysU512(FixedKeyHashTable { - hash_table: HashMap::>::new(), - hash_method, - }), - build_schema, - probe_schema, - hash_join_desc, - )?), }) } diff --git a/src/query/service/src/pipelines/processors/transforms/mod.rs b/src/query/service/src/pipelines/processors/transforms/mod.rs index d46dae370c9af..f414092c71723 100644 --- a/src/query/service/src/pipelines/processors/transforms/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/mod.rs @@ -15,7 +15,6 @@ mod aggregator; pub mod group_by; pub(crate) mod hash_join; -mod transform_aggregator; mod transform_cast_schema; mod transform_create_sets; mod transform_dummy; @@ -26,16 +25,24 @@ mod transform_mark_join; mod profile_wrapper; mod transform_add_const_columns; -mod transform_convert_grouping; mod transform_merge_block; mod transform_resort_addon; mod transform_right_join; mod transform_right_semi_anti_join; +pub use aggregator::efficiently_memory_final_aggregator; pub use aggregator::AggregateExchangeSorting; pub use aggregator::AggregatorParams; -pub use aggregator::AggregatorTransformParams; pub use aggregator::FinalSingleStateAggregator; +pub use aggregator::HashTableCell; +pub use aggregator::PartialSingleStateAggregator; +pub use aggregator::PartitionedHashTableDropper; +pub use aggregator::TransformAggregateSerializer; +pub use aggregator::TransformFinalAggregate; +pub use aggregator::TransformGroupByDeserializer; +pub use aggregator::TransformGroupBySerializer; +pub use aggregator::TransformPartialAggregate; +pub use aggregator::TransformPartialGroupBy; use common_pipeline_transforms::processors::transforms::transform; use common_pipeline_transforms::processors::transforms::transform_block_compact; use common_pipeline_transforms::processors::transforms::transform_compact; @@ -49,14 +56,11 @@ pub use hash_join::JoinHashTable; pub use hash_join::SerializerHashTable; pub use profile_wrapper::ProfileWrapper; pub use transform_add_const_columns::TransformAddConstColumns; -pub use transform_aggregator::TransformAggregator; pub use transform_block_compact::BlockCompactor; pub use transform_block_compact::TransformBlockCompact; pub use transform_cast_schema::TransformCastSchema; pub use transform_compact::Compactor; pub use transform_compact::TransformCompact; -pub use transform_convert_grouping::efficiently_memory_final_aggregator; -pub use transform_convert_grouping::TransformConvertGrouping; pub use transform_create_sets::SubqueryReceiver; pub use transform_create_sets::TransformCreateSets; pub use transform_dummy::create_dummy_item; diff --git a/src/query/service/src/pipelines/processors/transforms/transform_aggregator.rs b/src/query/service/src/pipelines/processors/transforms/transform_aggregator.rs deleted file mode 100644 index b54c3b848f7e1..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/transform_aggregator.rs +++ /dev/null @@ -1,387 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::sync::Arc; - -use common_catalog::table_context::TableContext; -use common_exception::ErrorCode; -use common_exception::Result; -use common_expression::*; - -use crate::pipelines::processors::port::InputPort; -use crate::pipelines::processors::port::OutputPort; -use crate::pipelines::processors::processor::Event; -use crate::pipelines::processors::transforms::aggregator::*; -use crate::pipelines::processors::AggregatorTransformParams; -use crate::pipelines::processors::Processor; -use crate::sessions::QueryContext; - -pub struct TransformAggregator; - -impl TransformAggregator { - pub fn try_create_partial( - transform_params: AggregatorTransformParams, - ctx: Arc, - pass_state_to_final: bool, - ) -> Result> { - let aggregator_params = transform_params.aggregator_params.clone(); - - if aggregator_params.group_columns.is_empty() { - return PartialSingleStateAggregator::try_create( - transform_params.transform_input_port, - transform_params.transform_output_port, - &transform_params.aggregator_params, - ); - } - - match aggregator_params.aggregate_functions.is_empty() { - true => with_mappedhash_method!(|T| match transform_params.method.clone() { - HashMethodKind::T(method) => AggregatorTransform::create( - ctx, - transform_params, - PartialAggregator::::create( - method, - aggregator_params, - pass_state_to_final, - )?, - ), - }), - false => with_mappedhash_method!(|T| match transform_params.method.clone() { - HashMethodKind::T(method) => AggregatorTransform::create( - ctx, - transform_params, - PartialAggregator::::create( - method, - aggregator_params, - pass_state_to_final, - )?, - ), - }), - } - } -} - -pub trait Aggregator: Sized + Send { - const NAME: &'static str; - - fn consume(&mut self, data: DataBlock) -> Result<()>; - // Generate could be called multiple times util it returns empty. - fn generate(&mut self) -> Result>; -} - -enum AggregatorTransform { - ConsumeData(ConsumeState), - PartitionedConsumeData(PartitionedConsumeState), - Generate(GenerateState), - PartitionedGenerate(GenerateState>), - Finished, -} - -impl - AggregatorTransform -{ - pub fn create( - ctx: Arc, - transform_params: AggregatorTransformParams, - inner: TAggregator, - ) -> Result> { - let settings = ctx.get_settings(); - let two_level_threshold = settings.get_group_by_two_level_threshold()? as usize; - - let transformer = AggregatorTransform::::ConsumeData(ConsumeState { - inner, - input_port: transform_params.transform_input_port, - output_port: transform_params.transform_output_port, - two_level_threshold, - input_data_block: None, - }); - - if TAggregator::SUPPORT_PARTITION - && transform_params.aggregator_params.has_distinct_combinator() - { - Ok(Box::new(transformer.convert_to_two_level_consume()?)) - } else { - Ok(Box::new(transformer)) - } - } - - pub fn convert_to_generate(self) -> Result { - match self { - AggregatorTransform::ConsumeData(s) => { - Ok(AggregatorTransform::Generate(GenerateState { - inner: s.inner, - is_generated: false, - output_port: s.output_port, - output_data_block: vec![], - })) - } - AggregatorTransform::PartitionedConsumeData(s) => { - Ok(AggregatorTransform::PartitionedGenerate(GenerateState { - inner: s.inner, - is_generated: false, - output_port: s.output_port, - output_data_block: vec![], - })) - } - _ => Err(ErrorCode::Internal("")), - } - } - - pub fn convert_to_two_level_consume(self) -> Result { - match self { - AggregatorTransform::ConsumeData(s) => Ok(AggregatorTransform::PartitionedConsumeData( - PartitionedConsumeState { - inner: s.inner.convert_partitioned()?, - input_port: s.input_port, - output_port: s.output_port, - input_data_block: None, - }, - )), - _ => Err(ErrorCode::Internal("")), - } - } -} - -impl Processor - for AggregatorTransform -{ - fn name(&self) -> String { - TAggregator::NAME.to_string() - } - - fn as_any(&mut self) -> &mut dyn Any { - self - } - - fn event(&mut self) -> Result { - match self { - AggregatorTransform::Finished => Ok(Event::Finished), - AggregatorTransform::Generate(_) => self.generate_event(), - AggregatorTransform::ConsumeData(_) => self.consume_event(), - AggregatorTransform::PartitionedConsumeData(_) => self.consume_event(), - AggregatorTransform::PartitionedGenerate(_) => self.generate_event(), - } - } - - fn process(&mut self) -> Result<()> { - match self { - AggregatorTransform::Finished => Ok(()), - AggregatorTransform::ConsumeData(state) => state.consume(), - AggregatorTransform::Generate(state) => state.generate(), - AggregatorTransform::PartitionedConsumeData(state) => state.consume(), - AggregatorTransform::PartitionedGenerate(state) => state.generate(), - } - } -} - -impl - AggregatorTransform -{ - #[inline(always)] - fn consume_event(&mut self) -> Result { - if let AggregatorTransform::ConsumeData(state) = self { - if TAggregator::SUPPORT_PARTITION { - let cardinality = state.inner.get_state_cardinality(); - - static TWOL_LEVEL_BYTES_THRESHOLD: usize = 5_000_000; - - if cardinality >= state.two_level_threshold - || state.inner.get_state_bytes() >= TWOL_LEVEL_BYTES_THRESHOLD - { - let mut temp_state = AggregatorTransform::Finished; - std::mem::swap(self, &mut temp_state); - temp_state = temp_state.convert_to_two_level_consume()?; - std::mem::swap(self, &mut temp_state); - debug_assert!(matches!(temp_state, AggregatorTransform::Finished)); - return Ok(Event::Sync); - } - } - - if state.input_data_block.is_some() { - return Ok(Event::Sync); - } - - if state.input_port.is_finished() { - let mut temp_state = AggregatorTransform::Finished; - std::mem::swap(self, &mut temp_state); - temp_state = temp_state.convert_to_generate()?; - std::mem::swap(self, &mut temp_state); - debug_assert!(matches!(temp_state, AggregatorTransform::Finished)); - return Ok(Event::Sync); - } - - return match state.input_port.has_data() { - true => { - state.input_data_block = Some(state.input_port.pull_data().unwrap()?); - Ok(Event::Sync) - } - false => { - state.input_port.set_need_data(); - Ok(Event::NeedData) - } - }; - } - - if let AggregatorTransform::PartitionedConsumeData(state) = self { - if state.input_data_block.is_some() { - return Ok(Event::Sync); - } - - if state.input_port.is_finished() { - let mut temp_state = AggregatorTransform::Finished; - std::mem::swap(self, &mut temp_state); - temp_state = temp_state.convert_to_generate()?; - std::mem::swap(self, &mut temp_state); - debug_assert!(matches!(temp_state, AggregatorTransform::Finished)); - return Ok(Event::Sync); - } - - return match state.input_port.has_data() { - true => { - state.input_data_block = Some(state.input_port.pull_data().unwrap()?); - Ok(Event::Sync) - } - false => { - state.input_port.set_need_data(); - Ok(Event::NeedData) - } - }; - } - - Err(ErrorCode::Internal("It's a bug")) - } - - #[inline(always)] - fn generate_event(&mut self) -> Result { - if let AggregatorTransform::Generate(state) = self { - if state.output_port.is_finished() { - let mut temp_state = AggregatorTransform::Finished; - std::mem::swap(self, &mut temp_state); - return Ok(Event::Finished); - } - - if !state.output_port.can_push() { - return Ok(Event::NeedConsume); - } - - if let Some(block) = state.output_data_block.pop() { - state.output_port.push_data(Ok(block)); - return Ok(Event::NeedConsume); - } - - if state.is_generated { - if !state.output_port.is_finished() { - state.output_port.finish(); - } - - let mut temp_state = AggregatorTransform::Finished; - std::mem::swap(self, &mut temp_state); - return Ok(Event::Finished); - } - - return Ok(Event::Sync); - } - - if let AggregatorTransform::PartitionedGenerate(state) = self { - if state.output_port.is_finished() { - let mut temp_state = AggregatorTransform::Finished; - std::mem::swap(self, &mut temp_state); - return Ok(Event::Finished); - } - - if !state.output_port.can_push() { - return Ok(Event::NeedConsume); - } - - if !state.output_data_block.is_empty() { - let block = state.output_data_block.remove(0); - state.output_port.push_data(Ok(block)); - return Ok(Event::NeedConsume); - } - - if state.is_generated { - if !state.output_port.is_finished() { - state.output_port.finish(); - } - - let mut temp_state = AggregatorTransform::Finished; - std::mem::swap(self, &mut temp_state); - return Ok(Event::Finished); - } - - return Ok(Event::Sync); - } - - Err(ErrorCode::Internal("It's a bug")) - } -} - -struct ConsumeState { - inner: TAggregator, - two_level_threshold: usize, - - input_port: Arc, - output_port: Arc, - input_data_block: Option, -} - -impl ConsumeState { - pub fn consume(&mut self) -> Result<()> { - if let Some(input_data) = self.input_data_block.take() { - self.inner.consume(input_data)?; - } - - Ok(()) - } -} - -struct PartitionedConsumeState { - inner: PartitionedAggregator, - - input_port: Arc, - output_port: Arc, - input_data_block: Option, -} - -impl PartitionedConsumeState { - pub fn consume(&mut self) -> Result<()> { - if let Some(input_data) = self.input_data_block.take() { - self.inner.consume(input_data)?; - } - - Ok(()) - } -} - -struct GenerateState { - inner: TAggregator, - is_generated: bool, - output_port: Arc, - output_data_block: Vec, -} - -impl GenerateState { - pub fn generate(&mut self) -> Result<()> { - if !self.is_generated { - self.output_data_block = self.inner.generate()?; - - // if it's empty, it means the aggregator is finished. - if self.output_data_block.is_empty() { - self.is_generated = true; - } - } - Ok(()) - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/transform_convert_grouping.rs b/src/query/service/src/pipelines/processors/transforms/transform_convert_grouping.rs deleted file mode 100644 index d5d97cd55b4c7..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/transform_convert_grouping.rs +++ /dev/null @@ -1,562 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::collections::btree_map::Entry; -use std::collections::BTreeMap; -use std::sync::Arc; - -use common_exception::Result; -use common_expression::with_hash_method; -use common_expression::BlockMetaInfo; -use common_expression::BlockMetaInfoPtr; -use common_expression::DataBlock; -use common_expression::HashMethodKind; -use common_hashtable::hash2bucket; -use common_pipeline_core::pipe::Pipe; -use common_pipeline_core::pipe::PipeItem; -use common_pipeline_core::processors::port::InputPort; -use common_pipeline_core::processors::port::OutputPort; -use common_pipeline_core::processors::processor::Event; -use common_pipeline_core::processors::processor::ProcessorPtr; -use common_pipeline_core::processors::Processor; -use common_pipeline_core::Pipeline; -use serde::Deserialize; -use serde::Deserializer; -use serde::Serialize; -use serde::Serializer; - -use super::aggregator::AggregateHashStateInfo; -use super::group_by::BUCKETS_LG2; -use crate::pipelines::processors::transforms::aggregator::AggregateInfo; -use crate::pipelines::processors::transforms::aggregator::BucketAggregator; -use crate::pipelines::processors::transforms::group_by::HashMethodBounds; -use crate::pipelines::processors::transforms::group_by::KeysColumnIter; -use crate::pipelines::processors::AggregatorParams; - -// Overflow to object storage data block -#[allow(dead_code)] -static OVERFLOW_BUCKET_NUM: isize = -2; -// Single level data block -static SINGLE_LEVEL_BUCKET_NUM: isize = -1; - -/// -#[derive(Debug)] -struct ConvertGroupingMetaInfo { - #[allow(dead_code)] - pub bucket: isize, - pub blocks: Vec, -} - -impl Serialize for ConvertGroupingMetaInfo { - fn serialize(&self, _: S) -> Result - where S: Serializer { - unreachable!("ConvertGroupingMetaInfo does not support exchanging between multiple nodes") - } -} - -impl<'de> Deserialize<'de> for ConvertGroupingMetaInfo { - fn deserialize(_: D) -> Result - where D: Deserializer<'de> { - unreachable!("ConvertGroupingMetaInfo does not support exchanging between multiple nodes") - } -} - -impl ConvertGroupingMetaInfo { - pub fn create(bucket: isize, blocks: Vec) -> BlockMetaInfoPtr { - Box::new(ConvertGroupingMetaInfo { bucket, blocks }) - } -} - -#[typetag::serde(name = "convert_grouping")] -impl BlockMetaInfo for ConvertGroupingMetaInfo { - fn as_any(&self) -> &dyn Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - unimplemented!("Unimplemented clone for ConvertGroupingMetaInfo") - } - - fn equals(&self, _: &Box) -> bool { - unimplemented!("Unimplemented equals for ConvertGroupingMetaInfo") - } -} - -struct InputPortState { - port: Arc, - bucket: isize, -} - -/// A helper class that Map -/// AggregateInfo/AggregateHashStateInfo ---> ConvertGroupingMetaInfo { meta: blocks with Option } -pub struct TransformConvertGrouping { - output: Arc, - inputs: Vec, - - method: Method, - working_bucket: isize, - pushing_bucket: isize, - initialized_all_inputs: bool, - params: Arc, - buckets_blocks: BTreeMap>, - unsplitted_blocks: Vec, -} - -impl TransformConvertGrouping { - pub fn create( - method: Method, - params: Arc, - input_nums: usize, - ) -> Result { - let mut inputs = Vec::with_capacity(input_nums); - - for _index in 0..input_nums { - inputs.push(InputPortState { - bucket: -1, - port: InputPort::create(), - }); - } - - Ok(TransformConvertGrouping { - method, - params, - inputs, - working_bucket: 0, - pushing_bucket: 0, - output: OutputPort::create(), - buckets_blocks: BTreeMap::new(), - unsplitted_blocks: vec![], - initialized_all_inputs: false, - }) - } - - pub fn get_inputs(&self) -> Vec> { - let mut inputs = Vec::with_capacity(self.inputs.len()); - - for input_state in &self.inputs { - inputs.push(input_state.port.clone()); - } - - inputs - } - - pub fn get_output(&self) -> Arc { - self.output.clone() - } - - fn initialize_all_inputs(&mut self) -> Result { - self.initialized_all_inputs = true; - - for index in 0..self.inputs.len() { - if self.inputs[index].port.is_finished() { - continue; - } - - // We pull the first unsplitted data block - if self.inputs[index].bucket > SINGLE_LEVEL_BUCKET_NUM { - continue; - } - - if !self.inputs[index].port.has_data() { - self.inputs[index].port.set_need_data(); - self.initialized_all_inputs = false; - continue; - } - - let data_block = self.inputs[index].port.pull_data().unwrap()?; - self.inputs[index].bucket = self.add_bucket(data_block); - - if self.inputs[index].bucket <= SINGLE_LEVEL_BUCKET_NUM { - self.inputs[index].port.set_need_data(); - self.initialized_all_inputs = false; - } - } - - Ok(self.initialized_all_inputs) - } - - fn add_bucket(&mut self, data_block: DataBlock) -> isize { - if let Some(info) = data_block - .get_meta() - .and_then(|meta| meta.as_any().downcast_ref::()) - { - if info.overflow.is_none() && info.bucket > SINGLE_LEVEL_BUCKET_NUM { - let bucket = info.bucket; - match self.buckets_blocks.entry(bucket) { - Entry::Vacant(v) => { - v.insert(vec![data_block]); - } - Entry::Occupied(mut v) => { - v.get_mut().push(data_block); - } - }; - - return bucket; - } - } - - // check if it's local state - if let Some(info) = data_block - .get_meta() - .and_then(|meta| meta.as_any().downcast_ref::()) - { - let bucket = info.bucket as isize; - match self.buckets_blocks.entry(bucket) { - Entry::Vacant(v) => { - v.insert(vec![data_block]); - } - Entry::Occupied(mut v) => { - v.get_mut().push(data_block); - } - }; - return bucket; - } - - self.unsplitted_blocks.push(data_block); - SINGLE_LEVEL_BUCKET_NUM - } - - fn try_push_data_block(&mut self) -> bool { - match self.buckets_blocks.is_empty() { - true => self.try_push_single_level(), - false => self.try_push_two_level(), - } - } - - fn try_push_two_level(&mut self) -> bool { - while self.pushing_bucket < self.working_bucket { - if let Some(bucket_blocks) = self.buckets_blocks.remove(&self.pushing_bucket) { - let meta = ConvertGroupingMetaInfo::create(self.pushing_bucket, bucket_blocks); - self.output.push_data(Ok(DataBlock::empty_with_meta(meta))); - self.pushing_bucket += 1; - return true; - } - - self.pushing_bucket += 1; - } - - false - } - - fn try_push_single_level(&mut self) -> bool { - if self.unsplitted_blocks.is_empty() { - return false; - } - - let unsplitted_blocks = std::mem::take(&mut self.unsplitted_blocks); - let meta = ConvertGroupingMetaInfo::create(SINGLE_LEVEL_BUCKET_NUM, unsplitted_blocks); - self.output.push_data(Ok(DataBlock::empty_with_meta(meta))); - true - } - - fn convert_to_two_level(&self, data_block: DataBlock) -> Result> { - let aggregate_function_len = self.params.aggregate_functions.len(); - let keys_column = data_block - .get_by_offset(aggregate_function_len) - .value - .as_column() - .unwrap(); - let keys_iter = self.method.keys_iter_from_column(keys_column)?; - - let mut indices = Vec::with_capacity(data_block.num_rows()); - - for key_item in keys_iter.iter() { - let hash = self.method.get_hash(key_item); - indices.push(hash2bucket::(hash as usize) as u16); - } - - DataBlock::scatter(&data_block, &indices, 1 << BUCKETS_LG2) - } -} - -#[async_trait::async_trait] -impl Processor for TransformConvertGrouping { - fn name(&self) -> String { - String::from("TransformConvertGrouping") - } - - fn as_any(&mut self) -> &mut dyn Any { - self - } - - fn event(&mut self) -> Result { - if self.output.is_finished() { - for input_state in &self.inputs { - input_state.port.finish(); - } - - self.buckets_blocks.clear(); - return Ok(Event::Finished); - } - - // We pull the first unsplitted data block - if !self.initialized_all_inputs && !self.initialize_all_inputs()? { - return Ok(Event::NeedData); - } - - if !self.buckets_blocks.is_empty() && !self.unsplitted_blocks.is_empty() { - // Split data blocks if it's unsplitted. - return Ok(Event::Sync); - } - - if !self.output.can_push() { - for input_state in &self.inputs { - input_state.port.set_not_need_data(); - } - - return Ok(Event::NeedConsume); - } - - let pushed_data_block = self.try_push_data_block(); - - loop { - // Try to pull the next data or until the port is closed - let mut all_inputs_is_finished = true; - let mut all_port_prepared_data = true; - - for index in 0..self.inputs.len() { - if self.inputs[index].port.is_finished() { - continue; - } - - all_inputs_is_finished = false; - if self.inputs[index].bucket > self.working_bucket { - continue; - } - - if !self.inputs[index].port.has_data() { - all_port_prepared_data = false; - self.inputs[index].port.set_need_data(); - continue; - } - - let data_block = self.inputs[index].port.pull_data().unwrap()?; - self.inputs[index].bucket = self.add_bucket(data_block); - debug_assert!(self.unsplitted_blocks.is_empty()); - - if self.inputs[index].bucket <= self.working_bucket { - all_port_prepared_data = false; - self.inputs[index].port.set_need_data(); - } - } - - if all_inputs_is_finished { - break; - } - - if !all_port_prepared_data { - return Ok(Event::NeedData); - } - - self.working_bucket += 1; - } - - if pushed_data_block || self.try_push_data_block() { - return Ok(Event::NeedConsume); - } - - if let Some((bucket, bucket_blocks)) = self.buckets_blocks.pop_first() { - let meta = ConvertGroupingMetaInfo::create(bucket, bucket_blocks); - self.output.push_data(Ok(DataBlock::empty_with_meta(meta))); - return Ok(Event::NeedConsume); - } - - self.output.finish(); - Ok(Event::Finished) - } - - fn process(&mut self) -> Result<()> { - if let Some(data_block) = self.unsplitted_blocks.pop() { - let data_block_meta: Option<&AggregateInfo> = data_block - .get_meta() - .and_then(|meta| meta.as_any().downcast_ref::()); - - let data_blocks = match data_block_meta { - None => self.convert_to_two_level(data_block)?, - Some(meta) => match &meta.overflow { - None => self.convert_to_two_level(data_block)?, - Some(_overflow_info) => unimplemented!(), - }, - }; - - for (bucket, block) in data_blocks.into_iter().enumerate() { - if !block.is_empty() { - match self.buckets_blocks.entry(bucket as isize) { - Entry::Vacant(v) => { - v.insert(vec![block]); - } - Entry::Occupied(mut v) => { - v.get_mut().push(block); - } - }; - } - } - } - - Ok(()) - } -} - -fn build_convert_grouping( - method: Method, - pipeline: &mut Pipeline, - params: Arc, -) -> Result<()> { - let input_nums = pipeline.output_len(); - let transform = TransformConvertGrouping::create(method.clone(), params.clone(), input_nums)?; - - let output = transform.get_output(); - let inputs_port = transform.get_inputs(); - - pipeline.add_pipe(Pipe::create(inputs_port.len(), 1, vec![PipeItem::create( - ProcessorPtr::create(Box::new(transform)), - inputs_port, - vec![output], - )])); - - pipeline.resize(input_nums)?; - - pipeline.add_transform(|input, output| { - MergeBucketTransform::try_create(input, output, method.clone(), params.clone()) - }) -} - -pub fn efficiently_memory_final_aggregator( - params: Arc, - pipeline: &mut Pipeline, -) -> Result<()> { - let group_cols = ¶ms.group_columns; - let schema_before_group_by = params.input_schema.clone(); - let sample_block = DataBlock::empty_with_schema(schema_before_group_by); - let method = DataBlock::choose_hash_method(&sample_block, group_cols)?; - - with_hash_method!(|T| match method { - HashMethodKind::T(v) => build_convert_grouping(v, pipeline, params.clone()), - }) -} - -struct MergeBucketTransform { - method: Method, - params: Arc, - - input: Arc, - output: Arc, - - input_block: Option, - output_blocks: Vec, -} - -impl MergeBucketTransform { - pub fn try_create( - input: Arc, - output: Arc, - method: Method, - params: Arc, - ) -> Result { - Ok(ProcessorPtr::create(Box::new(MergeBucketTransform { - input, - output, - method, - params, - input_block: None, - output_blocks: vec![], - }))) - } -} - -#[async_trait::async_trait] -impl Processor for MergeBucketTransform { - fn name(&self) -> String { - String::from("MergeBucketTransform") - } - - fn as_any(&mut self) -> &mut dyn Any { - self - } - - fn event(&mut self) -> Result { - if self.output.is_finished() { - self.input_block.take(); - self.output_blocks.clear(); - self.input.finish(); - return Ok(Event::Finished); - } - - if !self.output.can_push() { - self.input.set_not_need_data(); - return Ok(Event::NeedConsume); - } - - if let Some(output_data) = self.output_blocks.pop() { - self.output.push_data(Ok(output_data)); - return Ok(Event::NeedConsume); - } - - if self.input_block.is_some() { - return Ok(Event::Sync); - } - - if self.input.has_data() { - self.input_block = Some(self.input.pull_data().unwrap()?); - return Ok(Event::Sync); - } - - if self.input.is_finished() { - self.output.finish(); - return Ok(Event::Finished); - } - - self.input.set_need_data(); - Ok(Event::NeedData) - } - - fn process(&mut self) -> Result<()> { - if let Some(mut data_block) = self.input_block.take() { - let mut blocks = vec![]; - if let Some(mut meta) = data_block.take_meta() { - if let Some(meta) = meta.as_mut_any().downcast_mut::() { - std::mem::swap(&mut blocks, &mut meta.blocks); - } - } - - match self.params.aggregate_functions.is_empty() { - true => { - let mut bucket_merger = BucketAggregator::::create( - self.method.clone(), - self.params.clone(), - )?; - - self.output_blocks - .extend(bucket_merger.merge_blocks(blocks)?); - } - false => { - let mut bucket_merger = BucketAggregator::::create( - self.method.clone(), - self.params.clone(), - )?; - - self.output_blocks - .extend(bucket_merger.merge_blocks(blocks)?); - } - }; - } - - Ok(()) - } -} diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs index e459260651830..318e65e1f2c0b 100644 --- a/src/query/service/src/sessions/query_ctx_shared.rs +++ b/src/query/service/src/sessions/query_ctx_shared.rs @@ -338,6 +338,9 @@ impl QueryContextShared { impl Drop for QueryContextShared { fn drop(&mut self) { + // last_query_id() should return the query_id of the last executed statement, + // so we set it when the current context drops + // to avoid returning the query_id of the current statement. self.session .session_ctx .update_query_ids_results(self.init_query_id.read().clone(), None) diff --git a/src/query/sql/src/planner/binder/location.rs b/src/query/sql/src/planner/binder/location.rs index b24b5ef1553a8..07e34a334d3d6 100644 --- a/src/query/sql/src/planner/binder/location.rs +++ b/src/query/sql/src/planner/binder/location.rs @@ -27,6 +27,7 @@ use common_meta_app::storage::StorageIpfsConfig; use common_meta_app::storage::StorageOssConfig; use common_meta_app::storage::StorageParams; use common_meta_app::storage::StorageS3Config; +use common_meta_app::storage::StorageWebhdfsConfig; use common_meta_app::storage::STORAGE_GCS_DEFAULT_ENDPOINT; use common_meta_app::storage::STORAGE_IPFS_DEFAULT_ENDPOINT; use common_meta_app::storage::STORAGE_S3_DEFAULT_ENDPOINT; @@ -264,6 +265,41 @@ fn parse_hdfs_params(l: &mut UriLocation) -> Result { Ok(sp) } +// The FileSystem scheme of WebHDFS is “webhdfs://”. A WebHDFS FileSystem URI has the following format. +// webhdfs://:/ +fn parse_webhdfs_params(l: &mut UriLocation) -> Result { + let is_https = l + .connection + .get("https") + .map(|s| s.parse::()) + .unwrap_or(Ok(true)) + .map_err(|e| { + Error::new( + ErrorKind::InvalidInput, + format!( + "HTTPS should be `TRUE` or `FALSE`, parse error with: {:?}", + e, + ), + ) + })?; + let prefix = if is_https { "https" } else { "http" }; + let endpoint_url = format!("{prefix}://{}", l.name); + + let root = l.path.clone(); + + let delegation = l.connection.get("delegation").cloned().unwrap_or_default(); + + let sp = StorageParams::Webhdfs(StorageWebhdfsConfig { + endpoint_url, + root, + delegation, + }); + + l.connection.check()?; + + Ok(sp) +} + /// parse_uri_location will parse given UriLocation into StorageParams and Path. pub fn parse_uri_location(l: &mut UriLocation) -> Result<(StorageParams, String)> { // Path endswith `/` means it's a directory, otherwise it's a file. @@ -321,6 +357,7 @@ pub fn parse_uri_location(l: &mut UriLocation) -> Result<(StorageParams, String) let cfg = StorageFsConfig { root }; StorageParams::Fs(cfg) } + Scheme::Webhdfs => parse_webhdfs_params(l)?, v => { return Err(Error::new( ErrorKind::InvalidInput, diff --git a/src/query/sql/src/planner/optimizer/distributed/distributed.rs b/src/query/sql/src/planner/optimizer/distributed/distributed.rs index c46eef721a45c..a775781217acb 100644 --- a/src/query/sql/src/planner/optimizer/distributed/distributed.rs +++ b/src/query/sql/src/planner/optimizer/distributed/distributed.rs @@ -31,7 +31,7 @@ pub fn optimize_distributed_query(ctx: Arc, s_expr: &SExpr) -> distribution: Distribution::Any, }; let mut result = require_property(ctx, &required, s_expr)?; - push_down_topk_to_merge(&mut result, None)?; + result = push_down_topk_to_merge(&result, None)?; let rel_expr = RelExpr::with_s_expr(&result); let physical_prop = rel_expr.derive_physical_prop()?; let root_required = RequiredProperty { @@ -46,30 +46,38 @@ pub fn optimize_distributed_query(ctx: Arc, s_expr: &SExpr) -> } // Traverse the SExpr tree to find top_k, if find, push down it to Exchange::Merge -fn push_down_topk_to_merge(s_expr: &mut SExpr, mut top_k: Option) -> Result<()> { +fn push_down_topk_to_merge(s_expr: &SExpr, mut top_k: Option) -> Result { if let RelOperator::Exchange(Exchange::Merge) = s_expr.plan { // A quick fix for Merge child is aggregate. // Todo: consider to push down topk to the above of aggregate. if let RelOperator::Aggregate(_) = s_expr.child(0)?.plan { - return Ok(()); + return Ok(s_expr.clone()); } if let Some(top_k) = top_k { - let child = &mut s_expr.children[0]; - *child = SExpr::create_unary(top_k.sort.into(), child.clone()); + let mut child = s_expr.children[0].clone(); + child = SExpr::create_unary(top_k.sort.into(), child.clone()); + let children = if s_expr.children.len() == 2 { + vec![child, s_expr.children[1].clone()] + } else { + vec![child] + }; + return Ok(s_expr.replace_children(children)); } - return Ok(()); } - for child in s_expr.children.iter_mut() { + let mut s_expr_children = vec![]; + for child in s_expr.children.iter() { top_k = None; if let RelOperator::Sort(sort) = &child.plan { if sort.limit.is_some() { top_k = Some(TopK { sort: sort.clone() }); } } - for child_child in child.children.iter_mut() { - push_down_topk_to_merge(child_child, top_k.clone())?; + let mut new_children = vec![]; + for child_child in child.children.iter() { + new_children.push(push_down_topk_to_merge(child_child, top_k.clone())?); } + s_expr_children.push(child.replace_children(new_children)); } - Ok(()) + Ok(s_expr.replace_children(s_expr_children)) } diff --git a/src/query/sql/src/planner/optimizer/s_expr.rs b/src/query/sql/src/planner/optimizer/s_expr.rs index a9e2f93d56af3..2916243bba73e 100644 --- a/src/query/sql/src/planner/optimizer/s_expr.rs +++ b/src/query/sql/src/planner/optimizer/s_expr.rs @@ -32,7 +32,7 @@ use crate::ScalarExpr; #[derive(Clone, Debug)] pub struct SExpr { pub(crate) plan: RelOperator, - pub(crate) children: Vec, + pub(crate) children: Arc>, pub(crate) original_group: Option, @@ -58,7 +58,7 @@ impl SExpr { ) -> Self { SExpr { plan, - children, + children: Arc::new(children), original_group, rel_prop: Arc::new(Mutex::new(rel_prop)), @@ -104,12 +104,6 @@ impl SExpr { .ok_or_else(|| ErrorCode::Internal(format!("Invalid children index: {}", n))) } - pub fn child_mut(&mut self, n: usize) -> Result<&mut SExpr> { - self.children - .get_mut(n) - .ok_or_else(|| ErrorCode::Internal(format!("Invalid children index: {}", n))) - } - pub fn arity(&self) -> usize { self.children.len() } @@ -154,7 +148,7 @@ impl SExpr { original_group: None, rel_prop: Arc::new(Mutex::new(None)), applied_rules: self.applied_rules.clone(), - children, + children: Arc::new(children), } } diff --git a/src/query/storages/common/table-meta/src/meta/v2/segment.rs b/src/query/storages/common/table-meta/src/meta/v2/segment.rs index 8529ea8939cdd..457aad6116bff 100644 --- a/src/query/storages/common/table-meta/src/meta/v2/segment.rs +++ b/src/query/storages/common/table-meta/src/meta/v2/segment.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::HashMap; +use std::ops::Range; use std::sync::Arc; use common_arrow::native::ColumnMeta as NativeColumnMeta; @@ -114,6 +115,38 @@ impl ColumnMeta { ColumnMeta::Native(v) => (v.offset, v.pages.iter().map(|page| page.length).sum()), } } + + pub fn read_rows(&self, range: &Option>) -> u64 { + match self { + ColumnMeta::Parquet(v) => v.num_values, + ColumnMeta::Native(v) => match range { + Some(range) => v + .pages + .iter() + .skip(range.start) + .take(range.end - range.start) + .map(|page| page.num_values) + .sum(), + None => v.pages.iter().map(|page| page.num_values).sum(), + }, + } + } + + pub fn read_bytes(&self, range: &Option>) -> u64 { + match self { + ColumnMeta::Parquet(v) => v.len, + ColumnMeta::Native(v) => match range { + Some(range) => v + .pages + .iter() + .skip(range.start) + .take(range.end - range.start) + .map(|page| page.length) + .sum(), + None => v.pages.iter().map(|page| page.length).sum(), + }, + } + } } impl SegmentInfo { diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 5d272818bc427..95fd9b9b6192a 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -187,6 +187,15 @@ impl FuseTable { } } + /// Get max page size. + /// For native storage format. + pub fn get_max_page_size(&self) -> Option { + match self.storage_format { + FuseStorageFormat::Parquet => None, + FuseStorageFormat::Native => Some(self.get_write_settings().max_page_size), + } + } + pub fn parse_storage_prefix(table_info: &TableInfo) -> Result { let table_id = table_info.ident.table_id; let db_id = table_info diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index 1a2a8d3e4c375..9b75e52e7385f 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -72,11 +72,7 @@ impl FuseTable { } } - let max_page_size = if self.is_native() { - Some(write_settings.max_page_size) - } else { - None - }; + let max_page_size = self.get_max_page_size(); let cluster_stats_gen = self.get_cluster_stats_gen( ctx.clone(), max_page_size, diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 68255fea4b270..00a08dd71345a 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -371,12 +371,7 @@ impl FuseTable { cluster_key_index.push(index); } - let max_page_size = if self.is_native() { - Some(self.get_write_settings().max_page_size) - } else { - None - }; - + let max_page_size = self.get_max_page_size(); Ok(ClusterStatsGenerator::new( self.cluster_key_meta.as_ref().unwrap().0, cluster_key_index, diff --git a/src/query/storages/fuse/src/operations/mutation/compact/compact_meta.rs b/src/query/storages/fuse/src/operations/mutation/compact/compact_meta.rs index 31d4cf7953fd7..5f7355e17dec2 100644 --- a/src/query/storages/fuse/src/operations/mutation/compact/compact_meta.rs +++ b/src/query/storages/fuse/src/operations/mutation/compact/compact_meta.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::BlockMetaInfoPtr; use storages_common_table_meta::meta::SegmentInfo; @@ -37,20 +38,16 @@ impl BlockMetaInfo for CompactSourceMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - Box::new(self.clone()) - } - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { + match CompactSourceMeta::downcast_ref_from(info) { None => false, Some(other) => self == other, } } + + fn clone_self(&self) -> Box { + Box::new(self.clone()) + } } impl CompactSourceMeta { @@ -59,7 +56,7 @@ impl CompactSourceMeta { } pub fn from_meta(info: &BlockMetaInfoPtr) -> Result<&CompactSourceMeta> { - match info.as_any().downcast_ref::() { + match CompactSourceMeta::downcast_ref_from(info) { Some(part_ref) => Ok(part_ref), None => Err(ErrorCode::Internal( "Cannot downcast from BlockMetaInfo to CompactSourceMeta.", @@ -82,20 +79,16 @@ impl BlockMetaInfo for CompactSinkMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - Box::new(self.clone()) - } - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { + match CompactSinkMeta::downcast_ref_from(info) { None => false, Some(other) => self == other, } } + + fn clone_self(&self) -> Box { + Box::new(self.clone()) + } } impl CompactSinkMeta { @@ -114,7 +107,7 @@ impl CompactSinkMeta { } pub fn from_meta(info: &BlockMetaInfoPtr) -> Result<&CompactSinkMeta> { - match info.as_any().downcast_ref::() { + match CompactSinkMeta::downcast_ref_from(info) { Some(part_ref) => Ok(part_ref), None => Err(ErrorCode::Internal( "Cannot downcast from BlockMetaInfo to CompactSinkMeta.", diff --git a/src/query/storages/fuse/src/operations/mutation/mutation_meta.rs b/src/query/storages/fuse/src/operations/mutation/mutation_meta.rs index f079cb816e3c2..4b80eda13e903 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutation_meta.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutation_meta.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::BlockMetaInfoPtr; use storages_common_pruner::BlockMetaIndex; use storages_common_table_meta::meta::BlockMeta; @@ -39,20 +40,16 @@ impl BlockMetaInfo for SerializeDataMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - Box::new(self.clone()) - } - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { + match SerializeDataMeta::downcast_ref_from(info) { None => false, Some(other) => self == other, } } + + fn clone_self(&self) -> Box { + Box::new(self.clone()) + } } impl SerializeDataMeta { @@ -67,7 +64,7 @@ impl SerializeDataMeta { } pub fn from_meta(info: &BlockMetaInfoPtr) -> Result<&SerializeDataMeta> { - match info.as_any().downcast_ref::() { + match SerializeDataMeta::downcast_ref_from(info) { Some(part_ref) => Ok(part_ref), None => Err(ErrorCode::Internal( "Cannot downcast from BlockMetaInfo to SerializeDataMeta.", @@ -95,20 +92,16 @@ impl BlockMetaInfo for MutationTransformMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - Box::new(self.clone()) - } - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { + match MutationTransformMeta::downcast_ref_from(info) { None => false, Some(other) => self == other, } } + + fn clone_self(&self) -> Box { + Box::new(self.clone()) + } } impl MutationTransformMeta { @@ -117,7 +110,7 @@ impl MutationTransformMeta { } pub fn from_meta(info: &BlockMetaInfoPtr) -> Result<&MutationTransformMeta> { - match info.as_any().downcast_ref::() { + match MutationTransformMeta::downcast_ref_from(info) { Some(part_ref) => Ok(part_ref), None => Err(ErrorCode::Internal( "Cannot downcast from BlockMetaInfo to MutationTransformMeta.", @@ -139,20 +132,16 @@ impl BlockMetaInfo for MutationSinkMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - Box::new(self.clone()) - } - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { + match MutationSinkMeta::downcast_ref_from(info) { None => false, Some(other) => self == other, } } + + fn clone_self(&self) -> Box { + Box::new(self.clone()) + } } impl MutationSinkMeta { @@ -169,7 +158,7 @@ impl MutationSinkMeta { } pub fn from_meta(info: &BlockMetaInfoPtr) -> Result<&MutationSinkMeta> { - match info.as_any().downcast_ref::() { + match MutationSinkMeta::downcast_ref_from(info) { Some(part_ref) => Ok(part_ref), None => Err(ErrorCode::Internal( "Cannot downcast from BlockMetaInfo to MutationSinkMeta.", diff --git a/src/query/storages/fuse/src/operations/operation_log.rs b/src/query/storages/fuse/src/operations/operation_log.rs index 0fbd1323cf364..e88362005ce68 100644 --- a/src/query/storages/fuse/src/operations/operation_log.rs +++ b/src/query/storages/fuse/src/operations/operation_log.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_expression::BlockMetaInfo; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use storages_common_table_meta::meta::SegmentInfo; @@ -57,7 +58,7 @@ impl TryFrom<&DataBlock> for AppendOperationLogEntry { )); if let Some(meta) = block.get_meta() { - let cast = meta.as_any().downcast_ref::(); + let cast = AppendOperationLogEntry::downcast_ref_from(meta); return match cast { None => Err(err), Some(entry) => Ok(entry.clone()), @@ -74,18 +75,14 @@ impl BlockMetaInfo for AppendOperationLogEntry { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } - - fn clone_self(&self) -> Box { - Box::new(self.clone()) - } - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { + match AppendOperationLogEntry::downcast_ref_from(info) { None => false, Some(other) => self == other, } } + + fn clone_self(&self) -> Box { + Box::new(self.clone()) + } } diff --git a/src/query/storages/fuse/src/operations/read/fuse_source.rs b/src/query/storages/fuse/src/operations/read/fuse_source.rs index e537c57c616a2..dbc1cb73fd8e7 100644 --- a/src/query/storages/fuse/src/operations/read/fuse_source.rs +++ b/src/query/storages/fuse/src/operations/read/fuse_source.rs @@ -26,6 +26,7 @@ use common_pipeline_core::Pipeline; use common_pipeline_core::SourcePipeBuilder; use tracing::info; +use crate::fuse_part::FusePartInfo; use crate::io::BlockReader; use crate::operations::read::native_data_source_deserializer::NativeDeserializeDataTransform; use crate::operations::read::native_data_source_reader::ReadNativeDataSource; @@ -41,7 +42,8 @@ pub fn build_fuse_native_source_pipeline( topk: Option, mut max_io_requests: usize, ) -> Result<()> { - (max_threads, max_io_requests) = adjust_threads_and_request(max_threads, max_io_requests, plan); + (max_threads, max_io_requests) = + adjust_threads_and_request(true, max_threads, max_io_requests, plan); if topk.is_some() { max_threads = max_threads.min(16); @@ -122,7 +124,8 @@ pub fn build_fuse_parquet_source_pipeline( mut max_threads: usize, mut max_io_requests: usize, ) -> Result<()> { - (max_threads, max_io_requests) = adjust_threads_and_request(max_threads, max_io_requests, plan); + (max_threads, max_io_requests) = + adjust_threads_and_request(false, max_threads, max_io_requests, plan); let mut source_builder = SourcePipeBuilder::create(); @@ -220,12 +223,38 @@ pub fn dispatch_partitions( } pub fn adjust_threads_and_request( + is_native: bool, mut max_threads: usize, mut max_io_requests: usize, plan: &DataSourcePlan, ) -> (usize, usize) { if !plan.parts.is_lazy { - let block_nums = plan.parts.partitions.len().max(1); + let mut block_nums = plan.parts.partitions.len(); + + // If the read bytes of a partition is small enough, less than 16k rows + // we will not use an extra heavy thread to process it. + // now only works for native reader + static MIN_ROWS_READ_PER_THREAD: u64 = 16 * 1024; + if is_native { + plan.parts.partitions.iter().for_each(|part| { + if let Some(part) = part.as_any().downcast_ref::() { + let to_read_rows = part + .columns_meta + .values() + .map(|meta| meta.read_rows(&part.range)) + .find(|rows| *rows > 0) + .unwrap_or(part.nums_rows as u64); + + if to_read_rows < MIN_ROWS_READ_PER_THREAD { + block_nums -= 1; + } + } + }); + } + + // At least max(1/8 of the original parts, 1), in case of too many small partitions but io threads is just one. + block_nums = std::cmp::max(block_nums, plan.parts.partitions.len() / 8); + block_nums = std::cmp::max(block_nums, 1); max_threads = std::cmp::min(max_threads, block_nums); max_io_requests = std::cmp::min(max_io_requests, block_nums); diff --git a/src/query/storages/fuse/src/operations/read/native_data_source.rs b/src/query/storages/fuse/src/operations/read/native_data_source.rs index a7d864d330924..2213312944a77 100644 --- a/src/query/storages/fuse/src/operations/read/native_data_source.rs +++ b/src/query/storages/fuse/src/operations/read/native_data_source.rs @@ -67,15 +67,11 @@ impl BlockMetaInfo for NativeDataSourceMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self + fn equals(&self, _: &Box) -> bool { + unimplemented!("Unimplemented equals NativeDataSourceMeta") } fn clone_self(&self) -> Box { unimplemented!("Unimplemented clone NativeDataSourceMeta") } - - fn equals(&self, _: &Box) -> bool { - unimplemented!("Unimplemented equals NativeDataSourceMeta") - } } diff --git a/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs index 4fa307c99a4de..b8512e69bace7 100644 --- a/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs @@ -34,6 +34,7 @@ use common_exception::Result; use common_expression::filter_helper::FilterHelpers; use common_expression::types::BooleanType; use common_expression::BlockEntry; +use common_expression::BlockMetaInfoDowncast; use common_expression::Column; use common_expression::DataBlock; use common_expression::DataSchema; @@ -370,13 +371,10 @@ impl Processor for NativeDeserializeDataTransform { if self.input.has_data() { let mut data_block = self.input.pull_data().unwrap()?; - if let Some(mut source_meta) = data_block.take_meta() { - if let Some(source_meta) = source_meta - .as_mut_any() - .downcast_mut::() - { - self.parts = VecDeque::from(std::mem::take(&mut source_meta.part)); - self.chunks = VecDeque::from(std::mem::take(&mut source_meta.chunks)); + if let Some(block_meta) = data_block.take_meta() { + if let Some(source_meta) = NativeDataSourceMeta::downcast_from(block_meta) { + self.parts = VecDeque::from(source_meta.part); + self.chunks = VecDeque::from(source_meta.chunks); return Ok(Event::Sync); } } diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source.rs index e6b717adcc5ed..8d2297f0e134f 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source.rs @@ -64,15 +64,11 @@ impl BlockMetaInfo for DataSourceMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self + fn equals(&self, _: &Box) -> bool { + unimplemented!("Unimplemented equals DataSourceMeta") } fn clone_self(&self) -> Box { unimplemented!("Unimplemented clone DataSourceMeta") } - - fn equals(&self, _: &Box) -> bool { - unimplemented!("Unimplemented equals DataSourceMeta") - } } diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index b6e0992fedc74..a4671e8af96ed 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -21,6 +21,7 @@ use common_base::base::ProgressValues; use common_catalog::plan::PartInfoPtr; use common_catalog::table_context::TableContext; use common_exception::Result; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::port::OutputPort; @@ -108,11 +109,10 @@ impl Processor for DeserializeDataTransform { if self.input.has_data() { let mut data_block = self.input.pull_data().unwrap()?; - if let Some(mut source_meta) = data_block.take_meta() { - if let Some(source_meta) = source_meta.as_mut_any().downcast_mut::() - { - self.parts = source_meta.part.clone(); - self.chunks = std::mem::take(&mut source_meta.data); + if let Some(source_meta) = data_block.take_meta() { + if let Some(source_meta) = DataSourceMeta::downcast_from(source_meta) { + self.parts = source_meta.part; + self.chunks = source_meta.data; return Ok(Event::Sync); } } diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index fc87ff7558e06..39dd6d70ba21d 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -138,11 +138,7 @@ impl FuseTable { // ReadDataKind to avoid OOM. self.do_read_data(ctx.clone(), &plan, pipeline)?; - let max_page_size = if self.is_native() { - Some(self.get_write_settings().max_page_size) - } else { - None - }; + let max_page_size = self.get_max_page_size(); let cluster_stats_gen = self.get_cluster_stats_gen( ctx.clone(), diff --git a/src/query/storages/parquet/src/deserialize_transform.rs b/src/query/storages/parquet/src/deserialize_transform.rs index 9d58ba2140e77..be6ef63c9aeb5 100644 --- a/src/query/storages/parquet/src/deserialize_transform.rs +++ b/src/query/storages/parquet/src/deserialize_transform.rs @@ -28,6 +28,7 @@ use common_expression::filter_helper::FilterHelpers; use common_expression::types::BooleanType; use common_expression::types::DataType; use common_expression::BlockEntry; +use common_expression::BlockMetaInfoDowncast; use common_expression::DataBlock; use common_expression::DataSchemaRef; use common_expression::Evaluator; @@ -53,7 +54,8 @@ pub struct ParquetPrewhereInfo { pub func_ctx: FunctionContext, pub reader: Arc, pub filter: Expr, - pub top_k: Option<(usize, TopKSorter)>, /* the usize is the index of the column in ParquetReader.schema */ + pub top_k: Option<(usize, TopKSorter)>, + // the usize is the index of the column in ParquetReader.schema } pub struct ParquetDeserializeTransform { @@ -187,13 +189,10 @@ impl Processor for ParquetDeserializeTransform { if self.input.has_data() { let mut data_block = self.input.pull_data().unwrap()?; - let mut source_meta = data_block.take_meta().unwrap(); - let source_meta = source_meta - .as_mut_any() - .downcast_mut::() - .unwrap(); + let source_meta = data_block.take_meta().unwrap(); + let source_meta = ParquetSourceMeta::downcast_from(source_meta).unwrap(); - self.parts = VecDeque::from(std::mem::take(&mut source_meta.parts)); + self.parts = VecDeque::from(source_meta.parts); self.check_topn(); if self.top_k_finished { @@ -202,7 +201,7 @@ impl Processor for ParquetDeserializeTransform { return Ok(Event::Finished); } - self.data_readers = VecDeque::from(std::mem::take(&mut source_meta.readers)); + self.data_readers = VecDeque::from(source_meta.readers); return Ok(Event::Sync); } diff --git a/src/query/storages/parquet/src/parquet_source.rs b/src/query/storages/parquet/src/parquet_source.rs index 859c7e0da8ad5..a57fdd24cfb14 100644 --- a/src/query/storages/parquet/src/parquet_source.rs +++ b/src/query/storages/parquet/src/parquet_source.rs @@ -69,17 +69,13 @@ impl BlockMetaInfo for ParquetSourceMeta { self } - fn as_mut_any(&mut self) -> &mut dyn Any { - self + fn equals(&self, _: &Box) -> bool { + unimplemented!("Unimplemented equals ParquetSourceMeta") } fn clone_self(&self) -> Box { unimplemented!("Unimplemented clone ParquetSourceMeta") } - - fn equals(&self, _: &Box) -> bool { - unimplemented!("Unimplemented equals ParquetSourceMeta") - } } pub struct SyncParquetSource { diff --git a/tests/meta-upgrade-09/test-meta-upgrade-09.sh b/tests/meta-upgrade-09/test-meta-upgrade-09.sh index 96e40516a0e25..7f5587d5d1dab 100755 --- a/tests/meta-upgrade-09/test-meta-upgrade-09.sh +++ b/tests/meta-upgrade-09/test-meta-upgrade-09.sh @@ -38,10 +38,10 @@ fi echo " === check ver" ./target/${BUILD_PROFILE}/databend-meta-upgrade-09 --cmd print --raft-dir "$meta_dir" -count_of_v29=$(./target/${BUILD_PROFILE}/databend-meta-upgrade-09 --cmd print --raft-dir "$meta_dir" | grep ' ver: 29' | wc -l) -if [ "$count_of_table_meta" == "$count_of_v29" ]; then - echo " === count of ver=29: $count_of_v29; OK" +count_of_v30=$(./target/${BUILD_PROFILE}/databend-meta-upgrade-09 --cmd print --raft-dir "$meta_dir" | grep ' ver: 30' | wc -l) +if [ "$count_of_table_meta" == "$count_of_v30" ]; then + echo " === count of ver=30: $count_of_v30; OK" else - echo " === mismatching lines of ver=29: expect: $count_of_table_meta; got: $count_of_v29" + echo " === mismatching lines of ver=30: expect: $count_of_table_meta; got: $count_of_v30" exit 1 fi diff --git a/tests/sqllogictests/src/client/clickhouse_client.rs b/tests/sqllogictests/src/client/clickhouse_client.rs index 02619b9648e44..fa43e61d9583b 100644 --- a/tests/sqllogictests/src/client/clickhouse_client.rs +++ b/tests/sqllogictests/src/client/clickhouse_client.rs @@ -86,7 +86,7 @@ impl ClickhouseHttpClient { if self.debug { println!( - "Running sql with clickhouse client: [{sql}] ({:?})", + "Running sql with clickhouse http handler: [{sql}] ({:?})", start.elapsed() ); } diff --git a/tests/sqllogictests/src/error.rs b/tests/sqllogictests/src/error.rs index 15936c59818eb..8698d08858dc9 100644 --- a/tests/sqllogictests/src/error.rs +++ b/tests/sqllogictests/src/error.rs @@ -28,72 +28,30 @@ pub type Result = std::result::Result; pub enum DSqlLogicTestError { // Error from sqllogictest-rs #[error("SqlLogicTest error(from sqllogictest-rs crate): {0}")] - SqlLogicTest(TestError), + SqlLogicTest(#[from] TestError), // Error from databend #[error("Databend error: {0}")] - Databend(ErrorCode), + Databend(#[from] ErrorCode), // Error from mysql client #[error("mysql client error: {0}")] - MysqlClient(MysqlClientError), + MysqlClient(#[from] MysqlClientError), // Error from http client #[error("Http client error(from reqwest crate): {0}")] - HttpClient(HttpClientError), + HttpClient(#[from] HttpClientError), // Error from WalkDir #[error("Walk dir error: {0}")] - WalkDir(WalkDirError), + WalkDir(#[from] WalkDirError), // Error from IOError #[error("io error: {0}")] - IO(IOError), + IO(#[from] IOError), // Error from serde json #[error("Serde json error: {0}")] - SerdeJson(SerdeJsonError), + SerdeJson(#[from] SerdeJsonError), // Error from databend sqllogictests #[error("Databend sqllogictests error: {0}")] SelfError(String), } -impl From for DSqlLogicTestError { - fn from(value: TestError) -> Self { - DSqlLogicTestError::SqlLogicTest(value) - } -} - -impl From for DSqlLogicTestError { - fn from(value: ErrorCode) -> Self { - DSqlLogicTestError::Databend(value) - } -} - -impl From for DSqlLogicTestError { - fn from(value: MysqlClientError) -> Self { - DSqlLogicTestError::MysqlClient(value) - } -} - -impl From for DSqlLogicTestError { - fn from(value: HttpClientError) -> Self { - DSqlLogicTestError::HttpClient(value) - } -} - -impl From for DSqlLogicTestError { - fn from(value: WalkDirError) -> Self { - DSqlLogicTestError::WalkDir(value) - } -} - -impl From for DSqlLogicTestError { - fn from(value: IOError) -> Self { - DSqlLogicTestError::IO(value) - } -} - -impl From for DSqlLogicTestError { - fn from(value: SerdeJsonError) -> Self { - DSqlLogicTestError::SerdeJson(value) - } -} - impl From for DSqlLogicTestError { fn from(value: String) -> Self { DSqlLogicTestError::SelfError(value) diff --git a/tests/sqllogictests/suites/base/01_system/01_0011_system_query_cache b/tests/sqllogictests/suites/base/01_system/01_0011_system_query_cache index bf2ee2bb372be..0525833ac22b5 100644 --- a/tests/sqllogictests/suites/base/01_system/01_0011_system_query_cache +++ b/tests/sqllogictests/suites/base/01_system/01_0011_system_query_cache @@ -24,9 +24,30 @@ SELECT num_rows FROM system.query_cache; ---- 3 +statement ok +INSERT INTO t1 VALUES (4); + +statement ok +SELECT * FROM t1; + +# tables under system db should not be cached +statement ok +SET query_result_cache_allow_inconsistent = 1; + +query I +SELECT num_rows FROM system.query_cache; +---- +4 + +statement ok +SET query_result_cache_allow_inconsistent = 0; + statement ok SET enable_query_result_cache = 0; +statement ok +truncate table system.query_cache; + statement ok DROP TABLE t1; diff --git a/tests/sqllogictests/suites/base/03_dml/03_0000_select_aliases b/tests/sqllogictests/suites/base/03_common/03_0000_select_aliases similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0000_select_aliases rename to tests/sqllogictests/suites/base/03_common/03_0000_select_aliases diff --git a/tests/sqllogictests/suites/base/03_dml/03_0001_select_aggregator b/tests/sqllogictests/suites/base/03_common/03_0001_select_aggregator similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0001_select_aggregator rename to tests/sqllogictests/suites/base/03_common/03_0001_select_aggregator diff --git a/tests/sqllogictests/suites/base/03_dml/03_0002_select_aggregator_with_filter b/tests/sqllogictests/suites/base/03_common/03_0002_select_aggregator_with_filter similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0002_select_aggregator_with_filter rename to tests/sqllogictests/suites/base/03_common/03_0002_select_aggregator_with_filter diff --git a/tests/sqllogictests/suites/base/03_dml/03_0003_select_group_by b/tests/sqllogictests/suites/base/03_common/03_0003_select_group_by similarity index 89% rename from tests/sqllogictests/suites/base/03_dml/03_0003_select_group_by rename to tests/sqllogictests/suites/base/03_common/03_0003_select_group_by index 4d4c5f69899c6..be9d87f7abfd7 100644 --- a/tests/sqllogictests/suites/base/03_dml/03_0003_select_group_by +++ b/tests/sqllogictests/suites/base/03_common/03_0003_select_group_by @@ -167,3 +167,25 @@ select count() from numbers(10) group by to_nullable('ab') statement ok DROP TABLE t_array +## Decimal + +query III +select (number%3 + 1) c , (number%3 + 2) d, count() from numbers(100) group by c,d order by c,d; +---- +1 2 34 +2 3 33 +3 4 33 + +query II +select (number % 3)::Decimal(19, 2) c , count() from numbers(100) group by c order by c; +---- +0.00 34 +1.00 33 +2.00 33 + +query ITI +select (number % 3)::Decimal(19, 2) c, to_string(number % 3) d, count() from numbers(100) group by c,d order by c, d; +---- +0.00 0 34 +1.00 1 33 +2.00 2 33 diff --git a/tests/sqllogictests/suites/base/03_dml/03_0004_select_order_by b/tests/sqllogictests/suites/base/03_common/03_0004_select_order_by similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0004_select_order_by rename to tests/sqllogictests/suites/base/03_common/03_0004_select_order_by diff --git a/tests/sqllogictests/suites/base/03_dml/03_0004_select_order_by_db_table_col b/tests/sqllogictests/suites/base/03_common/03_0004_select_order_by_db_table_col similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0004_select_order_by_db_table_col rename to tests/sqllogictests/suites/base/03_common/03_0004_select_order_by_db_table_col diff --git a/tests/sqllogictests/suites/base/03_dml/03_0005_select_empty_projection b/tests/sqllogictests/suites/base/03_common/03_0005_select_empty_projection similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0005_select_empty_projection rename to tests/sqllogictests/suites/base/03_common/03_0005_select_empty_projection diff --git a/tests/sqllogictests/suites/base/03_dml/03_0005_select_filter b/tests/sqllogictests/suites/base/03_common/03_0005_select_filter similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0005_select_filter rename to tests/sqllogictests/suites/base/03_common/03_0005_select_filter diff --git a/tests/sqllogictests/suites/base/03_dml/03_0006_select_having b/tests/sqllogictests/suites/base/03_common/03_0006_select_having similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0006_select_having rename to tests/sqllogictests/suites/base/03_common/03_0006_select_having diff --git a/tests/sqllogictests/suites/base/03_dml/03_0007_select_alias b/tests/sqllogictests/suites/base/03_common/03_0007_select_alias similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0007_select_alias rename to tests/sqllogictests/suites/base/03_common/03_0007_select_alias diff --git a/tests/sqllogictests/suites/base/03_dml/03_0008_select_aggregator_combinator b/tests/sqllogictests/suites/base/03_common/03_0008_select_aggregator_combinator similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0008_select_aggregator_combinator rename to tests/sqllogictests/suites/base/03_common/03_0008_select_aggregator_combinator diff --git a/tests/sqllogictests/suites/base/03_dml/03_0009_select_exists b/tests/sqllogictests/suites/base/03_common/03_0009_select_exists similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0009_select_exists rename to tests/sqllogictests/suites/base/03_common/03_0009_select_exists diff --git a/tests/sqllogictests/suites/base/03_dml/03_0011_select_from_system_processes b/tests/sqllogictests/suites/base/03_common/03_0011_select_from_system_processes similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0011_select_from_system_processes rename to tests/sqllogictests/suites/base/03_common/03_0011_select_from_system_processes diff --git a/tests/sqllogictests/suites/base/03_dml/03_0012_select_number_type b/tests/sqllogictests/suites/base/03_common/03_0012_select_number_type similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0012_select_number_type rename to tests/sqllogictests/suites/base/03_common/03_0012_select_number_type diff --git a/tests/sqllogictests/suites/base/03_dml/03_0013_select_udf b/tests/sqllogictests/suites/base/03_common/03_0013_select_udf similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0013_select_udf rename to tests/sqllogictests/suites/base/03_common/03_0013_select_udf diff --git a/tests/sqllogictests/suites/base/03_dml/03_0014_insert_into_select b/tests/sqllogictests/suites/base/03_common/03_0014_insert_into_select similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0014_insert_into_select rename to tests/sqllogictests/suites/base/03_common/03_0014_insert_into_select diff --git a/tests/sqllogictests/suites/base/03_dml/03_0016_insert_into_values b/tests/sqllogictests/suites/base/03_common/03_0016_insert_into_values similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0016_insert_into_values rename to tests/sqllogictests/suites/base/03_common/03_0016_insert_into_values diff --git a/tests/sqllogictests/suites/base/03_dml/03_0017_insert_overwrite b/tests/sqllogictests/suites/base/03_common/03_0017_insert_overwrite similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0017_insert_overwrite rename to tests/sqllogictests/suites/base/03_common/03_0017_insert_overwrite diff --git a/tests/sqllogictests/suites/base/03_dml/03_0018_insert_into_variant b/tests/sqllogictests/suites/base/03_common/03_0018_insert_into_variant similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0018_insert_into_variant rename to tests/sqllogictests/suites/base/03_common/03_0018_insert_into_variant diff --git a/tests/sqllogictests/suites/base/03_dml/03_0020_group_by_negative b/tests/sqllogictests/suites/base/03_common/03_0020_group_by_negative similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0020_group_by_negative rename to tests/sqllogictests/suites/base/03_common/03_0020_group_by_negative diff --git a/tests/sqllogictests/suites/base/03_dml/03_0021_select_with_stackoverflow b/tests/sqllogictests/suites/base/03_common/03_0021_select_with_stackoverflow similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0021_select_with_stackoverflow rename to tests/sqllogictests/suites/base/03_common/03_0021_select_with_stackoverflow diff --git a/tests/sqllogictests/suites/base/03_dml/03_0022_select_distinct b/tests/sqllogictests/suites/base/03_common/03_0022_select_distinct similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0022_select_distinct rename to tests/sqllogictests/suites/base/03_common/03_0022_select_distinct diff --git a/tests/sqllogictests/suites/base/03_dml/03_0023_insert_into_array b/tests/sqllogictests/suites/base/03_common/03_0023_insert_into_array similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0023_insert_into_array rename to tests/sqllogictests/suites/base/03_common/03_0023_insert_into_array diff --git a/tests/sqllogictests/suites/base/03_dml/03_0024_select_window_function b/tests/sqllogictests/suites/base/03_common/03_0024_select_window_function similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0024_select_window_function rename to tests/sqllogictests/suites/base/03_common/03_0024_select_window_function diff --git a/tests/sqllogictests/suites/base/03_dml/03_0025_delete_from b/tests/sqllogictests/suites/base/03_common/03_0025_delete_from similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0025_delete_from rename to tests/sqllogictests/suites/base/03_common/03_0025_delete_from diff --git a/tests/sqllogictests/suites/base/03_dml/03_0026_insert_into_tuple b/tests/sqllogictests/suites/base/03_common/03_0026_insert_into_tuple similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0026_insert_into_tuple rename to tests/sqllogictests/suites/base/03_common/03_0026_insert_into_tuple diff --git a/tests/sqllogictests/suites/base/03_dml/03_0027_insert_default b/tests/sqllogictests/suites/base/03_common/03_0027_insert_default similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0027_insert_default rename to tests/sqllogictests/suites/base/03_common/03_0027_insert_default diff --git a/tests/sqllogictests/suites/base/03_dml/03_0028_copy_into_stage b/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0028_copy_into_stage rename to tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage diff --git a/tests/sqllogictests/suites/base/03_dml/03_0029_both_select_where_constant b/tests/sqllogictests/suites/base/03_common/03_0029_both_select_where_constant similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0029_both_select_where_constant rename to tests/sqllogictests/suites/base/03_common/03_0029_both_select_where_constant diff --git a/tests/sqllogictests/suites/base/03_dml/03_0030_select_ignore_result b/tests/sqllogictests/suites/base/03_common/03_0030_select_ignore_result similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0030_select_ignore_result rename to tests/sqllogictests/suites/base/03_common/03_0030_select_ignore_result diff --git a/tests/sqllogictests/suites/base/03_dml/03_0031_copy_into_user_stage b/tests/sqllogictests/suites/base/03_common/03_0031_copy_into_user_stage similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0031_copy_into_user_stage rename to tests/sqllogictests/suites/base/03_common/03_0031_copy_into_user_stage diff --git a/tests/sqllogictests/suites/base/03_dml/03_0032_select_null_count b/tests/sqllogictests/suites/base/03_common/03_0032_select_null_count similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0032_select_null_count rename to tests/sqllogictests/suites/base/03_common/03_0032_select_null_count diff --git a/tests/sqllogictests/suites/base/03_dml/03_0033_select_list b/tests/sqllogictests/suites/base/03_common/03_0033_select_list similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0033_select_list rename to tests/sqllogictests/suites/base/03_common/03_0033_select_list diff --git a/tests/sqllogictests/suites/base/03_dml/03_0034_select_exclude_list b/tests/sqllogictests/suites/base/03_common/03_0034_select_exclude_list similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0034_select_exclude_list rename to tests/sqllogictests/suites/base/03_common/03_0034_select_exclude_list diff --git a/tests/sqllogictests/suites/base/03_dml/03_0035_update b/tests/sqllogictests/suites/base/03_common/03_0035_update similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0035_update rename to tests/sqllogictests/suites/base/03_common/03_0035_update diff --git a/tests/sqllogictests/suites/base/03_dml/03_0036_two_level_efficiently_memory_aggregator b/tests/sqllogictests/suites/base/03_common/03_0036_two_level_efficiently_memory_aggregator similarity index 100% rename from tests/sqllogictests/suites/base/03_dml/03_0036_two_level_efficiently_memory_aggregator rename to tests/sqllogictests/suites/base/03_common/03_0036_two_level_efficiently_memory_aggregator diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0019_ddl_create_view b/tests/sqllogictests/suites/base/05_ddl/05_0019_ddl_create_view index 20e8ad3d13c3e..acd983bcb0d61 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0019_ddl_create_view +++ b/tests/sqllogictests/suites/base/05_ddl/05_0019_ddl_create_view @@ -102,3 +102,12 @@ create view tmp_view2(d1, d2) as select * from numbers(3) statement ok drop view if exists tmp_view + +query T +explain syntax create view test as SELECT * FROM 's3://testbucket/admin/data/tuple.parquet'(files => ('tuple.parquet', 'test.parquet'), pattern => '.*.parquet', FILE_FORMAT => 'parquet', aws_key_id => 'minioadmin', aws_secret_key => 'minioadmin', endpoint_url => 'http://127.0.0.1:9900/') +---- +CREATE VIEW test +AS + SELECT * + FROM + 's3://testbucket/admin/data/tuple.parquet' (FILES => ('tuple.parquet','test.parquet'),FILE_FORMAT => 'parquet',PATTERN => '.*.parquet', aws_key_id => 'minioadmin', aws_secret_key => 'minioadmin', endpoint_url => 'http://127.0.0.1:9900/') diff --git a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal index d7be42ee2956f..9b5ddff02fc17 100644 --- a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal +++ b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal @@ -23,11 +23,16 @@ drop table t; statement error 1049 create table t (a Decimal(0, 0)); -query I +query IIII select 1::Decimal(17, 3), 2::Decimal(19, 4), 3::Decimal(20, 5), cast(10 as Decimal(7, 2)); ---- 1.000 2.0000 3.00000 10.00 +query IIII +select 1::Decimal(17, 3)::Float64, 2::Decimal(19, 4)::Float64, 3::Decimal(20, 5)::Float64, cast(10 as Decimal(7, 2))::Float64; +---- +1.0 2.0 3.0 10.0 + ## parser overflow statement error 1005 select 2::Decimal(2000, 3); @@ -164,6 +169,12 @@ SELECT ANY(CAST(2.34 AS DECIMAL(6, 2))) 2.34 +query TTTT +select typeof(a+b), typeof(a-b), typeof(a*b), typeof(a/b) from (select 3::Decimal(13,2) a , 3.1::Decimal(8,3) b); +---- +DECIMAL(15, 3) DECIMAL(15, 3) DECIMAL(21, 5) DECIMAL(16, 2) + + ## compare query IIIII @@ -187,6 +198,10 @@ select a > b, a < b, a = b, a <= b, a >= b from (select 3::Decimal(13,2) a , 2.9 ---- 1 0 0 0 1 +query T +select typeof(a = b) from (select 3::Decimal(13,2) a , 2.9 b); +---- +BOOLEAN ## insert diff --git a/tests/suites/0_stateless/13_tpch/13_0000_prepare.sh b/tests/suites/0_stateless/13_tpch/13_0000_prepare.sh index 99b68ea94d073..8a302048417cd 100755 --- a/tests/suites/0_stateless/13_tpch/13_0000_prepare.sh +++ b/tests/suites/0_stateless/13_tpch/13_0000_prepare.sh @@ -33,7 +33,7 @@ echo "CREATE TABLE IF NOT EXISTS part p_type STRING not null, p_size INTEGER not null, p_container STRING not null, - p_retailprice DOUBLE not null, + p_retailprice DECIMAL(15, 2) not null, p_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -44,7 +44,7 @@ echo "CREATE TABLE IF NOT EXISTS supplier s_address STRING not null, s_nationkey INTEGER not null, s_phone STRING not null, - s_acctbal DOUBLE not null, + s_acctbal DECIMAL(15, 2) not null, s_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -53,7 +53,7 @@ echo "CREATE TABLE IF NOT EXISTS partsupp ps_partkey BIGINT not null, ps_suppkey BIGINT not null, ps_availqty BIGINT not null, - ps_supplycost DOUBLE not null, + ps_supplycost DECIMAL(15, 2) not null, ps_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -64,7 +64,7 @@ echo "CREATE TABLE IF NOT EXISTS customer c_address STRING not null, c_nationkey INTEGER not null, c_phone STRING not null, - c_acctbal DOUBLE not null, + c_acctbal DECIMAL(15, 2) not null, c_mktsegment STRING not null, c_comment STRING not null )" | $MYSQL_CLIENT_CONNECT @@ -74,7 +74,7 @@ echo "CREATE TABLE IF NOT EXISTS orders o_orderkey BIGINT not null, o_custkey BIGINT not null, o_orderstatus STRING not null, - o_totalprice DOUBLE not null, + o_totalprice DECIMAL(15, 2) not null, o_orderdate DATE not null, o_orderpriority STRING not null, o_clerk STRING not null, @@ -88,10 +88,10 @@ echo "CREATE TABLE IF NOT EXISTS lineitem l_partkey BIGINT not null, l_suppkey BIGINT not null, l_linenumber BIGINT not null, - l_quantity DOUBLE not null, - l_extendedprice DOUBLE not null, - l_discount DOUBLE not null, - l_tax DOUBLE not null, + l_quantity DECIMAL(15, 2) not null, + l_extendedprice DECIMAL(15, 2) not null, + l_discount DECIMAL(15, 2) not null, + l_tax DECIMAL(15, 2) not null, l_returnflag STRING not null, l_linestatus STRING not null, l_shipdate DATE not null, diff --git a/tests/suites/0_stateless/13_tpch/13_00014_q14.result b/tests/suites/0_stateless/13_tpch/13_00014_q14.result deleted file mode 100644 index 7389d7a5ccfd2..0000000000000 --- a/tests/suites/0_stateless/13_tpch/13_00014_q14.result +++ /dev/null @@ -1 +0,0 @@ -16.28385568900598 diff --git a/tests/suites/0_stateless/13_tpch/13_00015_q15.result b/tests/suites/0_stateless/13_tpch/13_00015_q15.result deleted file mode 100644 index a2fbaff3a929a..0000000000000 --- a/tests/suites/0_stateless/13_tpch/13_00015_q15.result +++ /dev/null @@ -1,4 +0,0 @@ -FRANCE GERMANY 1995 4637235.150099999 -FRANCE GERMANY 1996 5224779.573600002 -GERMANY FRANCE 1995 6232818.7036999995 -GERMANY FRANCE 1996 5557312.1121 diff --git a/tests/suites/0_stateless/13_tpch/13_00017_q17.result b/tests/suites/0_stateless/13_tpch/13_00017_q17.result deleted file mode 100644 index 45a8a32882a80..0000000000000 --- a/tests/suites/0_stateless/13_tpch/13_00017_q17.result +++ /dev/null @@ -1 +0,0 @@ -23512.75285714286 diff --git a/tests/suites/0_stateless/13_tpch/13_00019_q19.result b/tests/suites/0_stateless/13_tpch/13_00019_q19.result deleted file mode 100644 index 7d97cc13c8e5f..0000000000000 --- a/tests/suites/0_stateless/13_tpch/13_00019_q19.result +++ /dev/null @@ -1 +0,0 @@ -350370.4832 diff --git a/tests/suites/0_stateless/13_tpch/13_0001_q1.result b/tests/suites/0_stateless/13_tpch/13_0001_q1.result index 24ad3738ea1c4..7848850a06173 100644 --- a/tests/suites/0_stateless/13_tpch/13_0001_q1.result +++ b/tests/suites/0_stateless/13_tpch/13_0001_q1.result @@ -1,4 +1,4 @@ -A F 3774200.0 5320753880 5054096266.68 5256751331.44 25.5375 36002.1238 0.0501 147790 -N F 95257.0 133737795 127132372.65 132286291.22 25.3006 35521.3269 0.0493 3765 -N O 7459297.0 10512270008 9986238338.38 1.038557837658e10 25.5455 36000.9246 0.05 292000 -R F 3785523.0 5337950526 5071818532.94 5274405503.04 25.5259 35994.0292 0.0499 148301 +A F 3774200.00 5320753880.69 5054096266.6828 5256751331.449234 25.5375 36002.1238 0.0501 147790 +N F 95257.00 133737795.84 127132372.6512 132286291.229445 25.3006 35521.3269 0.0493 3765 +N O 7459297.00 10512270008.90 9986238338.3847 10385578376.585467 25.5455 36000.9246 0.0500 292000 +R F 3785523.00 5337950526.47 5071818532.9420 5274405503.049367 25.5259 35994.0292 0.0499 148301 diff --git a/tests/suites/0_stateless/13_tpch/13_0001_q1.sql b/tests/suites/0_stateless/13_tpch/13_0001_q1.sql index 426110428e588..8e1e5a4d3ef84 100644 --- a/tests/suites/0_stateless/13_tpch/13_0001_q1.sql +++ b/tests/suites/0_stateless/13_tpch/13_0001_q1.sql @@ -2,12 +2,12 @@ select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, - to_int64(sum(l_extendedprice)) as sum_base_price, - truncate(sum(l_extendedprice * (1 - l_discount)),2) as sum_disc_price, - truncate(sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)),2) as sum_charge, - truncate(avg(l_quantity),4) as avg_qty, - truncate(avg(l_extendedprice),4) as avg_price, - truncate(avg(l_discount),4) as avg_disc, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, count(*) as count_order from lineitem diff --git a/tests/suites/0_stateless/13_tpch/13_0002_q2.result b/tests/suites/0_stateless/13_tpch/13_0002_q2.result index b02d01b36d415..8f25c60ff0aba 100644 --- a/tests/suites/0_stateless/13_tpch/13_0002_q2.result +++ b/tests/suites/0_stateless/13_tpch/13_0002_q2.result @@ -3,20 +3,20 @@ 9508.37 Supplier#000000070 FRANCE 17268 Manufacturer#4 INWNH2w,OOWgNDq0BRCcBwOMQc6PdFDc4 16-821-608-1166 ests sleep quickly express ideas. ironic ideas haggle about the final T 9453.01 Supplier#000000802 ROMANIA 10021 Manufacturer#5 ,6HYXb4uaHITmtMBj4Ak57Pd 29-342-882-6463 gular frets. permanently special multipliers believe blithely alongs 9453.01 Supplier#000000802 ROMANIA 13275 Manufacturer#4 ,6HYXb4uaHITmtMBj4Ak57Pd 29-342-882-6463 gular frets. permanently special multipliers believe blithely alongs -9192.1 Supplier#000000115 UNITED KINGDOM 13325 Manufacturer#1 nJ 2t0f7Ve,wL1,6WzGBJLNBUCKlsV 33-597-248-1220 es across the carefully express accounts boost caref +9192.10 Supplier#000000115 UNITED KINGDOM 13325 Manufacturer#1 nJ 2t0f7Ve,wL1,6WzGBJLNBUCKlsV 33-597-248-1220 es across the carefully express accounts boost caref 9032.15 Supplier#000000959 GERMANY 4958 Manufacturer#4 8grA EHBnwOZhO 17-108-642-3106 nding dependencies nag furiou 8702.02 Supplier#000000333 RUSSIA 11810 Manufacturer#3 MaVf XgwPdkiX4nfJGOis8Uu2zKiIZH 32-508-202-6136 oss the deposits cajole carefully even pinto beans. regular foxes detect alo -8615.5 Supplier#000000812 FRANCE 10551 Manufacturer#2 8qh4tezyScl5bidLAysvutB,,ZI2dn6xP 16-585-724-6633 y quickly regular deposits? quickly pending packages after the caref -8615.5 Supplier#000000812 FRANCE 13811 Manufacturer#4 8qh4tezyScl5bidLAysvutB,,ZI2dn6xP 16-585-724-6633 y quickly regular deposits? quickly pending packages after the caref +8615.50 Supplier#000000812 FRANCE 10551 Manufacturer#2 8qh4tezyScl5bidLAysvutB,,ZI2dn6xP 16-585-724-6633 y quickly regular deposits? quickly pending packages after the caref +8615.50 Supplier#000000812 FRANCE 13811 Manufacturer#4 8qh4tezyScl5bidLAysvutB,,ZI2dn6xP 16-585-724-6633 y quickly regular deposits? quickly pending packages after the caref 8488.53 Supplier#000000367 RUSSIA 6854 Manufacturer#4 E Sv9brQVf43Mzz 32-458-198-9557 ages. carefully final excuses nag finally. carefully ironic deposits abov 8430.52 Supplier#000000646 FRANCE 11384 Manufacturer#3 IUzsmT,2oBgjhWP2TlXTL6IkJH,4h,1SJRt 16-601-220-5489 ites among the always final ideas kindle according to the theodolites. notornis in 8271.39 Supplier#000000146 RUSSIA 4637 Manufacturer#5 rBDNgCr04x0sfdzD5,gFOutCiG2 32-792-619-3155 s cajole quickly special requests. quickly enticing theodolites h 8096.98 Supplier#000000574 RUSSIA 323 Manufacturer#4 2O8 sy9g2mlBOuEjzj0pA2pevk, 32-866-246-8752 ully after the regular requests. slyly final dependencies wake slyly along the busy deposit 7392.78 Supplier#000000170 UNITED KINGDOM 7655 Manufacturer#2 RtsXQ,SunkA XHy9 33-803-340-5398 ake carefully across the quickly -7205.2 Supplier#000000477 GERMANY 10956 Manufacturer#5 VtaNKN5Mqui5yh7j2ldd5waf 17-180-144-7991 excuses wake express deposits. furiously careful asymptotes according to the carefull +7205.20 Supplier#000000477 GERMANY 10956 Manufacturer#5 VtaNKN5Mqui5yh7j2ldd5waf 17-180-144-7991 excuses wake express deposits. furiously careful asymptotes according to the carefull 6820.35 Supplier#000000007 UNITED KINGDOM 13217 Manufacturer#5 s,4TicNGB4uO6PaSqNBUq 33-990-965-2201 s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit -6721.7 Supplier#000000954 FRANCE 4191 Manufacturer#3 P3O5p UFz1QsLmZX 16-537-341-8517 ect blithely blithely final acco -6329.9 Supplier#000000996 GERMANY 10735 Manufacturer#2 Wx4dQwOAwWjfSCGupfrM 17-447-811-3282 ironic forges cajole blithely agai +6721.70 Supplier#000000954 FRANCE 4191 Manufacturer#3 P3O5p UFz1QsLmZX 16-537-341-8517 ect blithely blithely final acco +6329.90 Supplier#000000996 GERMANY 10735 Manufacturer#2 Wx4dQwOAwWjfSCGupfrM 17-447-811-3282 ironic forges cajole blithely agai 6173.87 Supplier#000000408 RUSSIA 18139 Manufacturer#1 qcor1u,vJXAokjnL5,dilyYNmh 32-858-724-2950 blithely pending packages cajole furiously slyly pending notornis. slyly final 5364.99 Supplier#000000785 RUSSIA 13784 Manufacturer#4 W VkHBpQyD3qjQjWGpWicOpmILFehmEdWy67kUGY 32-297-653-2203 packages boost carefully. express ideas along 5069.27 Supplier#000000328 GERMANY 16327 Manufacturer#1 SMm24d WG62 17-231-513-5721 he unusual ideas. slyly final packages a diff --git a/tests/suites/0_stateless/13_tpch/13_0003_q3.result b/tests/suites/0_stateless/13_tpch/13_0003_q3.result index 97abf8730421a..2eb3ff0d901e5 100644 --- a/tests/suites/0_stateless/13_tpch/13_0003_q3.result +++ b/tests/suites/0_stateless/13_tpch/13_0003_q3.result @@ -1,5 +1,5 @@ 223140 355369.0698 1995-03-14 0 584291 354494.7318 1995-02-21 0 405063 353125.4577 1995-03-03 0 -573861 351238.27699999994 1995-03-09 0 +573861 351238.2770 1995-03-09 0 554757 349181.7426 1995-03-14 0 diff --git a/tests/suites/0_stateless/13_tpch/13_0005_q5.result b/tests/suites/0_stateless/13_tpch/13_0005_q5.result index c2c4345941f8d..1c1c2aa216c4b 100644 --- a/tests/suites/0_stateless/13_tpch/13_0005_q5.result +++ b/tests/suites/0_stateless/13_tpch/13_0005_q5.result @@ -1,5 +1,5 @@ -CHINA 782211.0 -INDIA 637613.0 -JAPAN 600008.0 -INDONESIA 558048.0 -VIETNAM 449785.0 +CHINA 782210.3000 +INDIA 637612.1508 +JAPAN 600007.7218 +INDONESIA 558047.5402 +VIETNAM 449784.0546 diff --git a/tests/suites/0_stateless/13_tpch/13_0005_q5.sql b/tests/suites/0_stateless/13_tpch/13_0005_q5.sql index 59761d88b1d97..9a4093c63efce 100644 --- a/tests/suites/0_stateless/13_tpch/13_0005_q5.sql +++ b/tests/suites/0_stateless/13_tpch/13_0005_q5.sql @@ -1,7 +1,7 @@ select n_name, - ceil(sum(l_extendedprice * (1 - l_discount)) / 10) as revenue + sum(l_extendedprice * (1 - l_discount)) / 10 as revenue from customer, orders, diff --git a/tests/suites/0_stateless/13_tpch/13_0006_q6.result b/tests/suites/0_stateless/13_tpch/13_0006_q6.result index 367d5c36d0401..a1eae279bd2dc 100644 --- a/tests/suites/0_stateless/13_tpch/13_0006_q6.result +++ b/tests/suites/0_stateless/13_tpch/13_0006_q6.result @@ -1 +1 @@ -11803420.253 +11803420.2534 diff --git a/tests/suites/0_stateless/13_tpch/13_0006_q6.sql b/tests/suites/0_stateless/13_tpch/13_0006_q6.sql index 48c109d72a373..2e41f82c0365e 100644 --- a/tests/suites/0_stateless/13_tpch/13_0006_q6.sql +++ b/tests/suites/0_stateless/13_tpch/13_0006_q6.sql @@ -1,5 +1,5 @@ select - truncate(sum(l_extendedprice * l_discount),3) as revenue + sum(l_extendedprice * l_discount) as revenue from lineitem where diff --git a/tests/suites/0_stateless/13_tpch/13_0007_q7.result b/tests/suites/0_stateless/13_tpch/13_0007_q7.result index 001847159df08..f4f1549735147 100644 --- a/tests/suites/0_stateless/13_tpch/13_0007_q7.result +++ b/tests/suites/0_stateless/13_tpch/13_0007_q7.result @@ -1,4 +1,4 @@ -FRANCE GERMANY 1995 4637235.15 -FRANCE GERMANY 1996 5224779.573 -GERMANY FRANCE 1995 6232818.703 -GERMANY FRANCE 1996 5557312.112 +FRANCE GERMANY 1995 4637235.1501 +FRANCE GERMANY 1996 5224779.5736 +GERMANY FRANCE 1995 6232818.7037 +GERMANY FRANCE 1996 5557312.1121 diff --git a/tests/suites/0_stateless/13_tpch/13_0007_q7.sql b/tests/suites/0_stateless/13_tpch/13_0007_q7.sql index 93dcc60d0ce84..75de6efee7886 100644 --- a/tests/suites/0_stateless/13_tpch/13_0007_q7.sql +++ b/tests/suites/0_stateless/13_tpch/13_0007_q7.sql @@ -3,7 +3,7 @@ select supp_nation, cust_nation, l_year, - truncate(sum(volume),3) as revenue + sum(volume) as revenue from ( select diff --git a/tests/suites/0_stateless/13_tpch/13_0008_q8.result b/tests/suites/0_stateless/13_tpch/13_0008_q8.result index 758a3e060acdd..93a951430151c 100644 --- a/tests/suites/0_stateless/13_tpch/13_0008_q8.result +++ b/tests/suites/0_stateless/13_tpch/13_0008_q8.result @@ -1,2 +1,2 @@ -1995 0.02864874 -1996 0.01825027 +1995 0.0286 +1996 0.0182 diff --git a/tests/suites/0_stateless/13_tpch/13_0008_q8.sql b/tests/suites/0_stateless/13_tpch/13_0008_q8.sql index 46db4157fbfcf..49da90840ab65 100644 --- a/tests/suites/0_stateless/13_tpch/13_0008_q8.sql +++ b/tests/suites/0_stateless/13_tpch/13_0008_q8.sql @@ -1,10 +1,10 @@ select o_year, - truncate(sum(case + sum(case when nation = 'BRAZIL' then volume else 0 - end) / sum(volume),8) as mkt_share + end) / sum(volume) as mkt_share from ( select diff --git a/tests/suites/0_stateless/13_tpch/13_0009_q9.result b/tests/suites/0_stateless/13_tpch/13_0009_q9.result index 17e00e229aa53..bee6c9e4d1a22 100644 --- a/tests/suites/0_stateless/13_tpch/13_0009_q9.result +++ b/tests/suites/0_stateless/13_tpch/13_0009_q9.result @@ -1,5 +1,175 @@ -MOZAMBIQUE 1998 162042.0 -JORDAN 1998 181148.0 -MOROCCO 1998 181533.0 -JAPAN 1998 184953.0 -VIETNAM 1998 192431.0 +ALGERIA 1998 2321785.3682 +ALGERIA 1997 3685016.8589 +ALGERIA 1996 4276597.4253 +ALGERIA 1995 4418370.4154 +ALGERIA 1994 3864849.9521 +ALGERIA 1993 3541051.3865 +ALGERIA 1992 4310013.3482 +ARGENTINA 1998 2685983.8005 +ARGENTINA 1997 4242147.8124 +ARGENTINA 1996 3907867.0103 +ARGENTINA 1995 4605921.5011 +ARGENTINA 1994 3542096.1564 +ARGENTINA 1993 3949965.9388 +ARGENTINA 1992 4521180.4695 +BRAZIL 1998 2778730.3931 +BRAZIL 1997 4642037.4687 +BRAZIL 1996 4530304.6034 +BRAZIL 1995 4502344.8657 +BRAZIL 1994 4875806.5015 +BRAZIL 1993 4687478.6531 +BRAZIL 1992 5035200.0464 +CANADA 1998 2194509.0465 +CANADA 1997 3482197.9521 +CANADA 1996 3712231.2814 +CANADA 1995 4014814.8476 +CANADA 1994 4145304.4855 +CANADA 1993 3787069.6045 +CANADA 1992 4168009.4201 +CHINA 1998 3398578.0001 +CHINA 1997 6358959.3338 +CHINA 1996 6435158.3229 +CHINA 1995 6174776.2113 +CHINA 1994 6385751.0812 +CHINA 1993 5765034.1194 +CHINA 1992 6324034.2379 +EGYPT 1998 2333148.3334 +EGYPT 1997 3661244.2731 +EGYPT 1996 3765371.2368 +EGYPT 1995 4094744.2925 +EGYPT 1994 3566508.0818 +EGYPT 1993 3725283.7747 +EGYPT 1992 3373762.3335 +ETHIOPIA 1998 1953927.2682 +ETHIOPIA 1997 3285786.3266 +ETHIOPIA 1996 3525028.7952 +ETHIOPIA 1995 3781674.8911 +ETHIOPIA 1994 3037409.4360 +ETHIOPIA 1993 3008978.2677 +ETHIOPIA 1992 2721203.2355 +FRANCE 1998 2604373.8805 +FRANCE 1997 3982872.0488 +FRANCE 1996 3622479.2413 +FRANCE 1995 4479939.7020 +FRANCE 1994 3531013.1981 +FRANCE 1993 4086437.3102 +FRANCE 1992 3637792.1333 +GERMANY 1998 3291023.2965 +GERMANY 1997 5139337.3443 +GERMANY 1996 4799810.4577 +GERMANY 1995 5405785.7978 +GERMANY 1994 4555556.4592 +GERMANY 1993 4428195.1019 +GERMANY 1992 4656148.4204 +INDIA 1998 2591288.1874 +INDIA 1997 5159562.7033 +INDIA 1996 5307258.3049 +INDIA 1995 5148208.7902 +INDIA 1994 5164001.9582 +INDIA 1993 4321398.4388 +INDIA 1992 5297703.6935 +INDONESIA 1998 3094900.1597 +INDONESIA 1997 5719773.0358 +INDONESIA 1996 6037238.5993 +INDONESIA 1995 5266783.4899 +INDONESIA 1994 5470762.8729 +INDONESIA 1993 6189826.6613 +INDONESIA 1992 4414623.1549 +IRAN 1998 3214864.1209 +IRAN 1997 3688049.0691 +IRAN 1996 3621649.2247 +IRAN 1995 4420783.4205 +IRAN 1994 4373984.6523 +IRAN 1993 3731301.7814 +IRAN 1992 4417133.3662 +IRAQ 1998 2338859.4099 +IRAQ 1997 3622681.5643 +IRAQ 1996 4762291.8722 +IRAQ 1995 4558092.7359 +IRAQ 1994 4951604.1699 +IRAQ 1993 3830077.9911 +IRAQ 1992 3938636.4874 +JAPAN 1998 1849535.0802 +JAPAN 1997 4068688.8537 +JAPAN 1996 4044774.7597 +JAPAN 1995 4793005.8027 +JAPAN 1994 4114717.0568 +JAPAN 1993 3614468.7485 +JAPAN 1992 4266694.4700 +JORDAN 1998 1811488.0719 +JORDAN 1997 2951297.8678 +JORDAN 1996 3302528.3067 +JORDAN 1995 3221813.9990 +JORDAN 1994 2417892.0921 +JORDAN 1993 3107641.7661 +JORDAN 1992 3316379.0585 +KENYA 1998 2579075.4190 +KENYA 1997 2929194.2317 +KENYA 1996 3569129.5619 +KENYA 1995 3542889.1087 +KENYA 1994 3983095.3994 +KENYA 1993 3713988.9708 +KENYA 1992 3304641.8340 +MOROCCO 1998 1815334.8180 +MOROCCO 1997 3693214.8447 +MOROCCO 1996 4116175.9230 +MOROCCO 1995 3515127.1402 +MOROCCO 1994 4003072.1120 +MOROCCO 1993 3599199.6679 +MOROCCO 1992 3958335.4224 +MOZAMBIQUE 1998 1620428.7346 +MOZAMBIQUE 1997 2802166.6473 +MOZAMBIQUE 1996 2409955.1755 +MOZAMBIQUE 1995 2771602.6274 +MOZAMBIQUE 1994 2548226.2158 +MOZAMBIQUE 1993 2843748.9053 +MOZAMBIQUE 1992 2556501.0943 +PERU 1998 2036430.3602 +PERU 1997 4064142.4091 +PERU 1996 4068678.5671 +PERU 1995 4657694.8412 +PERU 1994 4731959.4655 +PERU 1993 4144006.6610 +PERU 1992 3754635.0078 +ROMANIA 1998 1992773.6811 +ROMANIA 1997 2854639.8680 +ROMANIA 1996 3139337.3029 +ROMANIA 1995 3222153.3776 +ROMANIA 1994 3222844.3190 +ROMANIA 1993 3488994.0288 +ROMANIA 1992 3029274.4420 +RUSSIA 1998 2339865.6635 +RUSSIA 1997 4153619.5424 +RUSSIA 1996 3772067.4041 +RUSSIA 1995 4704988.8607 +RUSSIA 1994 4479082.8694 +RUSSIA 1993 4767719.9791 +RUSSIA 1992 4533465.5590 +SAUDI ARABIA 1998 3386948.9564 +SAUDI ARABIA 1997 5425980.3373 +SAUDI ARABIA 1996 5227607.1677 +SAUDI ARABIA 1995 4506731.6411 +SAUDI ARABIA 1994 4698658.7425 +SAUDI ARABIA 1993 5493626.5285 +SAUDI ARABIA 1992 4573560.0150 +UNITED KINGDOM 1998 2252021.5137 +UNITED KINGDOM 1997 4343926.8026 +UNITED KINGDOM 1996 4189476.3065 +UNITED KINGDOM 1995 4469569.8829 +UNITED KINGDOM 1994 4410094.6264 +UNITED KINGDOM 1993 4054677.1050 +UNITED KINGDOM 1992 3978688.8831 +UNITED STATES 1998 2238771.5581 +UNITED STATES 1997 4135581.5734 +UNITED STATES 1996 3624013.2660 +UNITED STATES 1995 3892244.5172 +UNITED STATES 1994 3289224.1138 +UNITED STATES 1993 3626170.2028 +UNITED STATES 1992 3993973.4997 +VIETNAM 1998 1924313.4862 +VIETNAM 1997 3436195.3709 +VIETNAM 1996 4017288.8927 +VIETNAM 1995 3644054.1372 +VIETNAM 1994 4141277.6665 +VIETNAM 1993 2556114.1693 +VIETNAM 1992 4090524.4905 diff --git a/tests/suites/0_stateless/13_tpch/13_0009_q9.sql b/tests/suites/0_stateless/13_tpch/13_0009_q9.sql index 9870dd4ef8691..9a9b5021c3faf 100644 --- a/tests/suites/0_stateless/13_tpch/13_0009_q9.sql +++ b/tests/suites/0_stateless/13_tpch/13_0009_q9.sql @@ -1,33 +1,26 @@ - select nation, o_year, - truncate(truncate(sum(amount),0)/10, 0) as sum_profit + sum(amount) as sum_profit from ( select n_name as nation, extract(year from o_orderdate) as o_year, - truncate(l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity, 100) as amount + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from - part, - supplier, - lineitem, - partsupp, - orders, - nation + lineitem join orders on o_orderkey = l_orderkey + join part on p_partkey = l_partkey + join partsupp on ps_partkey = l_partkey + join supplier on s_suppkey = l_suppkey + join nation on s_nationkey = n_nationkey where - s_suppkey = l_suppkey - and ps_suppkey = l_suppkey - and ps_partkey = l_partkey - and p_partkey = l_partkey - and o_orderkey = l_orderkey - and s_nationkey = n_nationkey - and p_name like '%green%' + ps_suppkey = l_suppkey and + p_name like '%green%' ) as profit group by nation, o_year order by - sum_profit -limit 5; + nation, + o_year desc; \ No newline at end of file diff --git a/tests/suites/0_stateless/13_tpch/13_0010_q10.result b/tests/suites/0_stateless/13_tpch/13_0010_q10.result index 8fae3822f8983..0c3ee7bc09adc 100644 --- a/tests/suites/0_stateless/13_tpch/13_0010_q10.result +++ b/tests/suites/0_stateless/13_tpch/13_0010_q10.result @@ -1,5 +1,5 @@ -8242 Customer#000008242 622786.729 6322.09 ETHIOPIA P2n4nJhy,UqSo2s43YfSvYJDZ6lk 15-792-676-1184 slyly regular packages haggle carefully ironic ideas. courts are furiously. furiously unusual theodolites cajole. i -7714 Customer#000007714 557400.305 9799.98 IRAN SnnIGB,SkmnWpX3 20-922-418-6024 arhorses according to the blithely express re -11032 Customer#000011032 512500.964 8496.93 UNITED KINGDOM WIKHC7K3Cn7156iNOyfVG3cZ7YqkgsR,Ly 33-102-772-3533 posits-- furiously ironic accounts are again -2455 Customer#000002455 481592.405 2070.99 GERMANY RVn1ZSRtLqPlJLIZxvpmsbgC02 17-946-225-9977 al asymptotes. finally ironic accounts cajole furiously. permanently unusual theodolites aro -12106 Customer#000012106 479414.213 5342.11 UNITED STATES wth3twOmu6vy 34-905-346-4472 ly after the blithely regular foxes. accounts haggle carefully alongside of the blithely even ideas. +8242 Customer#000008242 622786.7297 6322.09 ETHIOPIA P2n4nJhy,UqSo2s43YfSvYJDZ6lk 15-792-676-1184 slyly regular packages haggle carefully ironic ideas. courts are furiously. furiously unusual theodolites cajole. i +7714 Customer#000007714 557400.3053 9799.98 IRAN SnnIGB,SkmnWpX3 20-922-418-6024 arhorses according to the blithely express re +11032 Customer#000011032 512500.9641 8496.93 UNITED KINGDOM WIKHC7K3Cn7156iNOyfVG3cZ7YqkgsR,Ly 33-102-772-3533 posits-- furiously ironic accounts are again +2455 Customer#000002455 481592.4053 2070.99 GERMANY RVn1ZSRtLqPlJLIZxvpmsbgC02 17-946-225-9977 al asymptotes. finally ironic accounts cajole furiously. permanently unusual theodolites aro +12106 Customer#000012106 479414.2133 5342.11 UNITED STATES wth3twOmu6vy 34-905-346-4472 ly after the blithely regular foxes. accounts haggle carefully alongside of the blithely even ideas. diff --git a/tests/suites/0_stateless/13_tpch/13_0010_q10.sql b/tests/suites/0_stateless/13_tpch/13_0010_q10.sql index 39957a7500989..b488450a089e6 100644 --- a/tests/suites/0_stateless/13_tpch/13_0010_q10.sql +++ b/tests/suites/0_stateless/13_tpch/13_0010_q10.sql @@ -2,7 +2,7 @@ select c_custkey, c_name, - TRUNCATE(sum(l_extendedprice * (1 - l_discount)), 3) as revenue, + sum(l_extendedprice * (1 - l_discount)) as revenue, c_acctbal, n_name, c_address, diff --git a/tests/suites/0_stateless/13_tpch/13_0013_q13.result b/tests/suites/0_stateless/13_tpch/13_0013_q13.result index 1dc47a8a74ca0..e0964d49fde3d 100644 --- a/tests/suites/0_stateless/13_tpch/13_0013_q13.result +++ b/tests/suites/0_stateless/13_tpch/13_0013_q13.result @@ -35,40 +35,3 @@ 35 5 1 3 36 1 -0 5000 -10 676 -9 651 -11 618 -12 554 -8 548 -13 514 -7 487 -19 485 -18 461 -14 454 -20 444 -16 442 -17 438 -15 430 -21 396 -22 378 -6 355 -23 322 -24 262 -25 188 -5 184 -26 162 -27 138 -28 103 -4 92 -29 59 -3 49 -30 29 -31 26 -32 19 -2 12 -33 8 -34 7 -35 5 -1 3 -36 1 diff --git a/tests/suites/0_stateless/13_tpch/13_0013_q13.sql b/tests/suites/0_stateless/13_tpch/13_0013_q13.sql index 829cfd6e998b1..86b1a12ad21ab 100644 --- a/tests/suites/0_stateless/13_tpch/13_0013_q13.sql +++ b/tests/suites/0_stateless/13_tpch/13_0013_q13.sql @@ -1,4 +1,3 @@ --- The sql will be converted to right join by join reorder select c_count, count(*) as custdist @@ -22,33 +21,3 @@ group by order by custdist desc, c_count desc; - --- Disable cbo, the the sql will be left join - -set enable_cbo = 0; - -select - c_count, - count(*) as custdist -from - ( - select - c_custkey, - count(o_orderkey) as c_count - from - customer - left outer join - orders - on c_custkey = o_custkey - and o_comment not like '%pending%deposits%' - group by - c_custkey - ) - c_orders -group by - c_count -order by - custdist desc, - c_count desc; - -set enable_cbo = 1; diff --git a/tests/suites/0_stateless/13_tpch/13_0014_q14.result b/tests/suites/0_stateless/13_tpch/13_0014_q14.result index 94cc99434bebb..86cce6a01fc42 100644 --- a/tests/suites/0_stateless/13_tpch/13_0014_q14.result +++ b/tests/suites/0_stateless/13_tpch/13_0014_q14.result @@ -1 +1 @@ -16.28385 +16.2838 diff --git a/tests/suites/0_stateless/13_tpch/13_0014_q14.sql b/tests/suites/0_stateless/13_tpch/13_0014_q14.sql index 36048acbbef59..b6f75bfe43ed4 100644 --- a/tests/suites/0_stateless/13_tpch/13_0014_q14.sql +++ b/tests/suites/0_stateless/13_tpch/13_0014_q14.sql @@ -1,10 +1,10 @@ select - TRUNCATE(100.00 * sum(case + 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 - end) / sum(l_extendedprice * (1 - l_discount)), 5) as promo_revenue + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part diff --git a/tests/suites/0_stateless/13_tpch/13_0015_q15.result b/tests/suites/0_stateless/13_tpch/13_0015_q15.result index 18bd767ab1b64..1551e0522e742 100644 --- a/tests/suites/0_stateless/13_tpch/13_0015_q15.result +++ b/tests/suites/0_stateless/13_tpch/13_0015_q15.result @@ -1 +1 @@ -677 Supplier#000000677 8mhrffG7D2WJBSQbOGstQ 23-290-639-3315 1614410.29 +677 Supplier#000000677 8mhrffG7D2WJBSQbOGstQ 23-290-639-3315 1614410.2928 diff --git a/tests/suites/0_stateless/13_tpch/13_0015_q15.sql b/tests/suites/0_stateless/13_tpch/13_0015_q15.sql index 7a76dd2f2c50e..e235dda47c281 100644 --- a/tests/suites/0_stateless/13_tpch/13_0015_q15.sql +++ b/tests/suites/0_stateless/13_tpch/13_0015_q15.sql @@ -1,7 +1,7 @@ with revenue as ( select l_suppkey as supplier_no, - truncate(sum(l_extendedprice * (1 - l_discount)), 2) as total_revenue + sum(l_extendedprice * (1 - l_discount)) as total_revenue from lineitem where diff --git a/tests/suites/0_stateless/13_tpch/13_0017_q17.result b/tests/suites/0_stateless/13_tpch/13_0017_q17.result index 0a817a882ee99..576108bdbb53c 100644 --- a/tests/suites/0_stateless/13_tpch/13_0017_q17.result +++ b/tests/suites/0_stateless/13_tpch/13_0017_q17.result @@ -1 +1 @@ -23512.75285714 +23512.75 diff --git a/tests/suites/0_stateless/13_tpch/13_0017_q17.sql b/tests/suites/0_stateless/13_tpch/13_0017_q17.sql index f025b7b3d3788..03283194d6e75 100644 --- a/tests/suites/0_stateless/13_tpch/13_0017_q17.sql +++ b/tests/suites/0_stateless/13_tpch/13_0017_q17.sql @@ -1,6 +1,6 @@ select - truncate(sum(l_extendedprice) / 7.0,8) as avg_yearly + sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, part diff --git a/tests/suites/0_stateless/13_tpch/13_0018_q18.result b/tests/suites/0_stateless/13_tpch/13_0018_q18.result index 1e3ae71e3cf5e..6150df3557897 100644 --- a/tests/suites/0_stateless/13_tpch/13_0018_q18.result +++ b/tests/suites/0_stateless/13_tpch/13_0018_q18.result @@ -1,5 +1,5 @@ -Customer#000001639 1639 502886 1994-04-12 456423.88 312.0 -Customer#000006655 6655 29158 1995-10-21 452805.02 305.0 -Customer#000014110 14110 565574 1995-09-24 425099.85 301.0 -Customer#000001775 1775 6882 1997-04-09 408368.1 303.0 -Customer#000011459 11459 551136 1993-05-19 386812.74 308.0 +Customer#000001639 1639 502886 1994-04-12 456423.88 312.00 +Customer#000006655 6655 29158 1995-10-21 452805.02 305.00 +Customer#000014110 14110 565574 1995-09-24 425099.85 301.00 +Customer#000001775 1775 6882 1997-04-09 408368.10 303.00 +Customer#000011459 11459 551136 1993-05-19 386812.74 308.00 diff --git a/tests/suites/0_stateless/13_tpch/13_0019_q19.result b/tests/suites/0_stateless/13_tpch/13_0019_q19.result index e8729c807f3c6..7d97cc13c8e5f 100644 --- a/tests/suites/0_stateless/13_tpch/13_0019_q19.result +++ b/tests/suites/0_stateless/13_tpch/13_0019_q19.result @@ -1 +1 @@ -350370.483 +350370.4832 diff --git a/tests/suites/0_stateless/13_tpch/13_0019_q19.sql b/tests/suites/0_stateless/13_tpch/13_0019_q19.sql index a622ede015f35..1c366fff2ec61 100644 --- a/tests/suites/0_stateless/13_tpch/13_0019_q19.sql +++ b/tests/suites/0_stateless/13_tpch/13_0019_q19.sql @@ -1,6 +1,6 @@ select - truncate(sum(l_extendedprice* (1 - l_discount)),3) as revenue + sum(l_extendedprice* (1 - l_discount)) as revenue from lineitem, part diff --git a/tests/suites/0_stateless/13_tpch/13_0022_q22.result b/tests/suites/0_stateless/13_tpch/13_0022_q22.result index 35d96d2ee2d93..916a875cb3355 100644 --- a/tests/suites/0_stateless/13_tpch/13_0022_q22.result +++ b/tests/suites/0_stateless/13_tpch/13_0022_q22.result @@ -1,7 +1,7 @@ -13 94 714035 -17 96 722560 -18 99 738012 -23 93 708285 -29 85 632693 -30 87 646748 -31 87 647372 +13 94 714035.05 +17 96 722560.15 +18 99 738012.52 +23 93 708285.25 +29 85 632693.46 +30 87 646748.02 +31 87 647372.50 diff --git a/tests/suites/0_stateless/13_tpch/13_0022_q22.sql b/tests/suites/0_stateless/13_tpch/13_0022_q22.sql index d04b6f51c091c..b1405340dc20c 100644 --- a/tests/suites/0_stateless/13_tpch/13_0022_q22.sql +++ b/tests/suites/0_stateless/13_tpch/13_0022_q22.sql @@ -1,7 +1,7 @@ select cntrycode, count(*) as numcust, - to_int64(sum(c_acctbal)) as totacctbal + sum(c_acctbal) as totacctbal from ( select diff --git a/website/blog/2023-02-24-Deploy-databend-on-minio.md b/website/blog/2023-02-24-Deploy-databend-on-minio.md index 37da38b4be33f..2bcc4c802f74c 100644 --- a/website/blog/2023-02-24-Deploy-databend-on-minio.md +++ b/website/blog/2023-02-24-Deploy-databend-on-minio.md @@ -1,6 +1,6 @@ --- -title: Deploying a single instance of Databend using on MinIO -description: Using Databend to facilitate data analysis on MinIO +title: Deploying a single instance of Databend on MinIO +description: Using Databend to facilitate data analysis with MinIO slug: deploying-databend-using-on-minio date: 2023-02-24 tags: [beginner] @@ -15,11 +15,11 @@ authors: ![Alt text](../static/img/blog/databend-minio-beginner-01.png) -In this article, we will discuss how to deploy a single instance of Databend using MinIO for facilitating data analysis. MinIO is an object storage solution that is lightweight and easy to operate. Databend is a modern data warehouse designed for cloud architecture, built with Rust and open-source. It provides rapid elastic scaling and aims to create an on-demand, pay-as-you-go data cloud product experience. +This blog walks you through the process of deploying a single instance of Databend on MinIO. MinIO is an object storage solution that is lightweight and easy to operate. Databend is a modern data warehouse designed for cloud architecture, built with Rust and open-source. It provides rapid elastic scaling and aims to create an on-demand, pay-as-you-go data cloud product experience. Open-Source Repo: https://github.com/datafuselabs/databend/ -Databend Docs: https://databend.rs +Databend Docs: https://databend.rs/doc ## Databend Architecture @@ -40,7 +40,7 @@ This layer interacts directly with users and their storage. They interact with D The Databend storage layer is the Databend Fuse Engine and supports for cloud and self-built object storage. Databend uses the Parquet format to store data block with min/max indexing, sparse indexing, bloom indexing, etc. -### Databend Supported Deployment Platforms +### Supported Deployment Platforms | Platform | Supported| | ----------- | -------- | @@ -67,13 +67,11 @@ The following uses the MinIO + Databend standalone deployment in Linux of x64 as | Software | Path | Port | | -------- | -------------- | --------------------------------------------------- | | minio | /data/minio | 9900 | -| databend | /data/databend | mysql: 3307
http: 8000
Clickhouse http: 8124 | +| databend | /data/databend | mysql: 3307
http: 8000
ClickHouse http: 8124 | -### Minio Deployment +### Deploy MinIO -MinIO Homepage: https://min.io/ - -Download MinIO from the official website and start it by running the following commands: +Download [MinIO](https://min.io/) from the official website and start it by running the following commands: ```Bash cd /data @@ -121,15 +119,16 @@ sudo chown -R $USER /var/log/databend ![ ](../static/img/blog/databend-minio-beginner-01-3.png) The Databend installation files are extracted to a directory named "databend". + ### Configure Databend -Once you have installed Databend, you'll need to configure it. The default configuration file for databend-query is included with the download. You can modify this file as follows: +You need to configure Databend after the installation. The default configuration file for databend-query is included with the download. You can modify this file as follows: ```Bash vim configs/databend-query.toml ``` -change: +Updates: ```Bash # Storage config.[storage] @@ -151,12 +150,12 @@ secret_access_key = "minioadmin" To start Databend, run the following command: -```Bash +```bash ./script/start.sh ``` To check that Databend is running, you can use the following command: -``` +```bash ps axu |grep databend ``` @@ -164,16 +163,16 @@ ps axu |grep databend To stop Databend, run the following command: -```Bash +```bash ./script/stop.sh ``` -#### Connect to Databend +#### Connect to Databend -Databend has three external service ports by default: +Databend has three external service ports by default: - MySQL: 3307 supports MySQL cli and application connection. -- Clickhouse: 8124 Clickhouse http handler protocol +- ClickHouse: 8124 ClickHouse http handler protocol - [http prot: 8000](https://databend.rs/doc/integrations/api/rest) The Databend HTTP handler is a REST API To connect to Databend using the MySQL client, run the following command: @@ -184,17 +183,17 @@ mysql -h 127.0.0.1 -P3307 -uroot >Note that the root user can log in without a password from localhost. Databend permission management refers to the design of MySQL 8.0 and allows you to manage Databend users in the same way as MySQL 8.0 user management. -Clickhouse protocol using: https://databend.rs/doc/reference/api/clickhouse-handler +ClickHouse protocol using: https://databend.rs/doc/reference/api/clickhouse-handler You're all set up now. Use Databend as you're with MySQL. ## Other Resources -- load data to Databend: https://databend.rs/doc/load-data/ +- Loading data to Databend: https://databend.rs/doc/load-data/ - Databend k8s opterator: https://github.com/datafuselabs/helm-charts -- bendsql: https://github.com/databendcloud/bendsql -- Databend driver: +- bendsql: https://github.com/databendcloud/bendsql +- Databend drivers: - Java Driver: https://github.com/databendcloud/databend-jdbc - - Go Driver: https://github.com/databendcloud/databend-go + - Go Driver: https://github.com/databendcloud/databend-go - Python Driver: https://github.com/databendcloud/databend-py ## Connect With Us diff --git a/website/src/css/custom.scss b/website/src/css/custom.scss index b80a933787f5f..32067c1e86a6f 100644 --- a/website/src/css/custom.scss +++ b/website/src/css/custom.scss @@ -264,10 +264,9 @@ --image-bg: url(data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNjAiIGhlaWdodD0iNjAiIHZpZXdCb3g9IjAgMCA2MCA2MCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPGNpcmNsZSBjeD0iMzAiIGN5PSIzMCIgcj0iMSIgZmlsbD0id2hpdGUiIGZpbGwtb3BhY2l0eT0iMC4xMiIvPgo8L3N2Zz4K); } -body{ +html,body{ background-color: var(--color-bg-0)!important; - background-image:var(--image-bg); - background-repeat: repeat; + height: -webkit-fill-available; } a:hover{ text-decoration: none; diff --git a/website/src/pages/index.module.scss b/website/src/pages/index.module.scss index 91422e708368e..abe5fc874fe7d 100644 --- a/website/src/pages/index.module.scss +++ b/website/src/pages/index.module.scss @@ -1,156 +1,161 @@ -.heroContainer{ - display: flex; - padding-top: 60px; - gap: 32px; - flex-direction: column; - align-items: center; - hr{ - width: 100%; - max-width: 720px; - } - .title{ - font-family: 'Poppins'; - font-style: normal; - font-weight: 800; - font-size: 60px; - line-height: 84px; - text-align: center; - color: var(--color-text-0); - span{ - background: linear-gradient(94.99deg, #EF708E -2.92%, #C9357C 34.95%, #5200FF 81.04%), #010E29; - -webkit-background-clip: text; - -webkit-text-fill-color: transparent; - background-clip: text; - text-fill-color: transparent; - } - @include media{ - font-size: 48px; - } - } - .subtitle{ - font-family: 'Poppins'; - font-style: normal; - font-weight: 600; - font-size: 16px; - line-height: 24px; - /* identical to box height */ - text-align: center; - color: var(--color-text-0); - } - .actionGroup{ - display: flex; - gap: 16px; - justify-content: center; - align-items: center; - flex-wrap: wrap; - } - .Community{ +.homePage{ + background-image:var(--image-bg); + background-repeat: repeat; + height: 100vh; + .heroContainer{ display: flex; - gap: 24px; + padding-top: 60px; + gap: 32px; flex-direction: column; - padding-top: 32px; - h6{ + align-items: center; + hr{ + width: 100%; + max-width: 720px; + } + .title{ + font-family: 'Poppins'; + font-style: normal; + font-weight: 800; + font-size: 60px; + line-height: 84px; text-align: center; - margin: 0; - font-weight: 400; - color: var(--color-text-1); + color: var(--color-text-0); + span{ + background: linear-gradient(94.99deg, #EF708E -2.92%, #C9357C 34.95%, #5200FF 81.04%), #010E29; + -webkit-background-clip: text; + -webkit-text-fill-color: transparent; + background-clip: text; + text-fill-color: transparent; + } + @include media{ + font-size: 48px; + } } - .CommunityGroup{ - display: flex; - gap: 24px; - max-width: 720px; - flex-direction: row; - flex-wrap: wrap; - justify-content: center; - .communityItem{ - padding: 12px; + .subtitle{ + font-family: 'Poppins'; + font-style: normal; + font-weight: 600; + font-size: 16px; + line-height: 24px; + /* identical to box height */ + text-align: center; + color: var(--color-text-0); + } + .actionGroup{ display: flex; - gap: 8px; + gap: 16px; + justify-content: center; align-items: center; - background-color: var(--color-fill-0); - border-radius: 6px; - border: 1px solid var(--color-border); - min-width: 340px; - width: 100%; + flex-wrap: wrap; + } + .Community{ + display: flex; + gap: 24px; + flex-direction: column; + padding-top: 32px; h6{ + text-align: center; margin: 0; - color:var(--color-text-0); - font-weight: 700; + font-weight: 400; + color: var(--color-text-1); } - .Icon{ - padding: 6px; + .CommunityGroup{ + display: flex; + gap: 24px; + max-width: 720px; + flex-direction: row; + flex-wrap: wrap; + justify-content: center; + .communityItem{ + padding: 12px; display: flex; - color:var(--color-text-0) - } - &:hover{ - background-color: var(--color-primary-light-default); - border-color: var(--color-primary); + gap: 8px; + align-items: center; + background-color: var(--color-fill-0); + border-radius: 6px; + border: 1px solid var(--color-border); + min-width: 340px; + width: 100%; + h6{ + margin: 0; + color:var(--color-text-0); + font-weight: 700; + } .Icon{ - color: var(--color-primary)!important; + padding: 6px; + display: flex; + color:var(--color-text-0) } - h6{ - text-decoration: none!important; - color: var(--color-primary); + &:hover{ + background-color: var(--color-primary-light-default); + border-color: var(--color-primary); + .Icon{ + color: var(--color-primary)!important; + } + h6{ + text-decoration: none!important; + color: var(--color-primary); + } + } + .tag{ + font-size: 14px; + color: var(--color-text-1); + background-color: var(--color-fill-1); + padding: 0 4px; + border-radius: 3px; } } - .tag{ - font-size: 14px; - color: var(--color-text-1); - background-color: var(--color-fill-1); - padding: 0 4px; - border-radius: 3px; } } - } - } - .cloudBanner{ - padding: 20px; - display: flex; - flex-direction: column; - gap: 20px; - background: linear-gradient(94.99deg, #EF708E -2.92%, #C9357C 34.95%, #5200FF 81.04%), #A7B2E1; - color: white; - border-radius: 12px; - border: 0.5px solid var(--color-border); - width: 100%; - max-width: 720px; - align-items: center; - h5{ - font-size: 16px; + .cloudBanner{ + padding: 20px; + display: flex; + flex-direction: column; + gap: 20px; + background: linear-gradient(94.99deg, #EF708E -2.92%, #C9357C 34.95%, #5200FF 81.04%), #A7B2E1; color: white; - margin-bottom: 4px; + border-radius: 12px; + border: 0.5px solid var(--color-border); + width: 100%; + max-width: 720px; + align-items: center; + h5{ + font-size: 16px; + color: white; + margin-bottom: 4px; + } } + @include media{ + padding: 20px; + } + } - @include media{ - padding: 20px; - } - -} -.Button{ - display: flex; - padding: 12px 16px; - gap: 4px; - color:var(--color-text-0); - background-color: var(--color-fill-0); - border: 1px solid var(--color-border); - &:hover{ + .Button{ + display: flex; + padding: 12px 16px; + gap: 4px; color:var(--color-text-0); - background-color: var(--color-fill-1); - } - &.Primary{ - background-color: var(--color-primary); - color: white; + background-color: var(--color-fill-0); + border: 1px solid var(--color-border); &:hover{ - background-color: var(--color-primary-hover); + color:var(--color-text-0); + background-color: var(--color-fill-1); + } + &.Primary{ + background-color: var(--color-primary); + color: white; + &:hover{ + background-color: var(--color-primary-hover); + } + } + &.White{ + color: white; + border: 1px solid var(--color-border); + } + @include media{ + width: 100%; + justify-content: center; } } - &.White{ - color: white; - border: 1px solid var(--color-border); - } - @include media{ - width: 100%; - justify-content: center; - } -} \ No newline at end of file +} diff --git a/website/src/pages/index.tsx b/website/src/pages/index.tsx index bf7813004daad..2c92dbe59d130 100644 --- a/website/src/pages/index.tsx +++ b/website/src/pages/index.tsx @@ -36,7 +36,7 @@ function HomepageHeader() { const { Github,Getstart,Book } = icons return ( - <> +
@@ -81,7 +81,7 @@ function HomepageHeader() {
- + ); }