diff --git a/.github/workflows/kvrocks.yaml b/.github/workflows/kvrocks.yaml index 84f1a1ab6da..45d1916aeed 100644 --- a/.github/workflows/kvrocks.yaml +++ b/.github/workflows/kvrocks.yaml @@ -39,8 +39,8 @@ jobs: outputs: docs_only: ${{ steps.result.outputs.docs_only }} steps: - - uses: actions/checkout@v3 - - uses: dorny/paths-filter@v2 + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3.0.0 id: changes with: filters: .github/config/changes.yml @@ -56,9 +56,9 @@ jobs: env: FORCE_COLOR: 1 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install typos - run: curl -LsSf https://github.com/crate-ci/typos/releases/download/v1.16.3/typos-v1.16.3-x86_64-unknown-linux-musl.tar.gz | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin + run: curl -LsSf https://github.com/crate-ci/typos/releases/download/v1.18.2/typos-v1.18.2-x86_64-unknown-linux-musl.tar.gz | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin - name: Run typos check run: typos --config .github/config/typos.toml @@ -68,15 +68,16 @@ jobs: if: ${{ needs.precondition.outputs.docs_only != 'true' }} runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 - - uses: actions/setup-go@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: go-version-file: 'tests/gocase/go.mod' + cache: false - name: Prepare Dependencies run: | sudo apt update sudo apt install -y clang-format-14 clang-tidy-14 - - uses: apache/skywalking-eyes/header@v0.4.0 + - uses: apache/skywalking-eyes/header@v0.5.0 with: config: .github/config/licenserc.yml - name: Check with clang-format @@ -96,7 +97,7 @@ jobs: git diff -p > clang-format.patch cat clang-format.patch - name: Upload format patch - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: always() && steps.check-format.outcome != 'success' with: path: clang-format.patch @@ -214,7 +215,7 @@ jobs: - name: Cache redis id: cache-redis - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: | ~/local/bin/redis-cli @@ -227,16 +228,17 @@ jobs: mkdir -p $HOME/local/bin pushd redis-6.2.7 && BUILD_TLS=yes make -j$NPROC redis-cli && mv src/redis-cli $HOME/local/bin/ && popd - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: 3.x - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: go-version-file: 'tests/gocase/go.mod' - + cache: false + - name: Install gcovr 5.0 run: pip install gcovr==5.0 # 5.1 is not supported if: ${{ matrix.sonarcloud }} @@ -308,7 +310,7 @@ jobs: exit 1 fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: ${{ failure() && startsWith(matrix.os, 'ubuntu') }} with: name: kvrocks-coredumps-${{ matrix.name }} @@ -330,7 +332,7 @@ jobs: - name: Upload SonarCloud data if: ${{ matrix.sonarcloud }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: sonarcloud-data path: ${{ env.SONARCLOUD_OUTPUT_DIR }} @@ -341,10 +343,10 @@ jobs: if: ${{ needs.precondition.outputs.docs_only != 'true' }} runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get core numbers run: echo "NPROC=$(nproc)" >> $GITHUB_ENV - - uses: docker/build-push-action@v3 + - uses: docker/build-push-action@v5 with: context: . build-args: MORE_BUILD_ARGS=-j${{ env.NPROC }} @@ -438,11 +440,12 @@ jobs: wget https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.sh bash cmake-3.26.4-linux-x86_64.sh --skip-license --prefix=/usr - - uses: actions/checkout@v3 - - uses: actions/setup-go@v3 + - uses: actions/checkout@v3 #v4 use Node 20 and not working at CentOS 7 + - uses: actions/setup-go@v4 #v5 use Node 20 too if: ${{ !startsWith(matrix.image, 'opensuse') }} with: go-version-file: 'tests/gocase/go.mod' + cache: false - name: Build Kvrocks run: | diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml index 6ea6e7d3a3a..736b603e017 100644 --- a/.github/workflows/nightly.yaml +++ b/.github/workflows/nightly.yaml @@ -30,20 +30,20 @@ jobs: if: github.repository_owner == 'apache' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Login Docker Hub if: (github.event_name != 'pull_request') - uses: docker/login-action@v1 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Available platforms run: echo ${{ steps.buildx.outputs.platforms }} @@ -53,7 +53,7 @@ jobs: - name: Docker meta id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: apache/kvrocks flavor: latest=false @@ -61,7 +61,7 @@ jobs: type=sha,prefix=nightly-{{date 'YYYYMMDD'}}-,format=short type=raw,value=nightly - - uses: docker/build-push-action@v3 + - uses: docker/build-push-action@v5 with: context: . platforms: linux/amd64, linux/arm64 diff --git a/.github/workflows/sonar.yaml b/.github/workflows/sonar.yaml index 36a2d2a0f39..a106823cd6f 100644 --- a/.github/workflows/sonar.yaml +++ b/.github/workflows/sonar.yaml @@ -28,7 +28,7 @@ jobs: runs-on: ubuntu-22.04 if: github.event.workflow_run.conclusion == 'success' && github.repository_owner == 'apache' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: repository: ${{ github.event.workflow_run.head_repository.full_name }} ref: ${{ github.event.workflow_run.head_sha }} @@ -36,7 +36,7 @@ jobs: - name: Install sonar-scanner and build-wrapper uses: SonarSource/sonarcloud-github-c-cpp@v2 - name: 'Download code coverage' - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: script: | let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({ @@ -60,7 +60,7 @@ jobs: unzip sonarcloud-data.zip -d sonarcloud-data ls -a sonarcloud-data - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: 3.x - name: Configure Kvrocks diff --git a/CMakeLists.txt b/CMakeLists.txt index ea4d1bec892..3e384e03665 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,6 +138,7 @@ include(cmake/fmt.cmake) include(cmake/jsoncons.cmake) include(cmake/xxhash.cmake) include(cmake/span.cmake) +include(cmake/trie.cmake) if (ENABLE_LUAJIT) include(cmake/luajit.cmake) @@ -169,6 +170,7 @@ list(APPEND EXTERNAL_LIBS Threads::Threads) list(APPEND EXTERNAL_LIBS ${Backtrace_LIBRARY}) list(APPEND EXTERNAL_LIBS xxhash) list(APPEND EXTERNAL_LIBS span-lite) +list(APPEND EXTERNAL_LIBS tsl_hat_trie) # Add git sha to version.h find_package(Git REQUIRED) diff --git a/NOTICE b/NOTICE index c3ab6e56d30..a25a60d1e70 100644 --- a/NOTICE +++ b/NOTICE @@ -65,6 +65,7 @@ The text of each license is also included in licenses/LICENSE-[project].txt * fmt(https://github.com/fmtlib/fmt) * LuaJIT(https://github.com/KvrocksLabs/LuaJIT) * lua(https://github.com/KvrocksLabs/lua, alternative to LuaJIT) +* hat-trie(https://github.com/Tessil/hat-trie) ================================================================ Boost Software License Version 1.0 diff --git a/README.md b/README.md index bd0a6798996..ffe4614862d 100644 --- a/README.md +++ b/README.md @@ -225,6 +225,7 @@ Apache Kvrocks is licensed under the Apache License Version 2.0. See the [LICENS ## Social Media - [Medium](https://kvrocks.medium.com/) +- [X (Twitter)](https://twitter.com/apache_kvrocks) - [Zhihu](https://www.zhihu.com/people/kvrocks) (in Chinese) - WeChat Official Account (in Chinese, scan the QR code to follow) diff --git a/cmake/fmt.cmake b/cmake/fmt.cmake index 7aa22ffd94d..981272e47b3 100644 --- a/cmake/fmt.cmake +++ b/cmake/fmt.cmake @@ -20,8 +20,8 @@ include_guard() include(cmake/utils.cmake) FetchContent_DeclareGitHubWithMirror(fmt - fmtlib/fmt 10.1.1 - MD5=2a91a7d74be8bfd3a19e7e2abbc7c034 + fmtlib/fmt 10.2.1 + MD5=1bba4e8bdd7b0fa98f207559ffa380a3 ) FetchContent_MakeAvailableWithArgs(fmt) diff --git a/cmake/jsoncons.cmake b/cmake/jsoncons.cmake index 8092745d56a..a81bd8388a0 100644 --- a/cmake/jsoncons.cmake +++ b/cmake/jsoncons.cmake @@ -20,8 +20,8 @@ include_guard() include(cmake/utils.cmake) FetchContent_DeclareGitHubWithMirror(jsoncons - danielaparker/jsoncons v0.172.0 - MD5=2bc70a1ddc8c5fc96d43c0cb4d10dfa0 + danielaparker/jsoncons v0.173.4 + MD5=947254529a8629d001322a78454a23d2 ) FetchContent_MakeAvailableWithArgs(jsoncons diff --git a/cmake/rocksdb.cmake b/cmake/rocksdb.cmake index bd05eb35323..d7c71532b37 100644 --- a/cmake/rocksdb.cmake +++ b/cmake/rocksdb.cmake @@ -26,8 +26,8 @@ endif() include(cmake/utils.cmake) FetchContent_DeclareGitHubWithMirror(rocksdb - facebook/rocksdb v8.10.0 - MD5=ed06e98fae30c29cceacbfd45a316f06 + facebook/rocksdb v8.10.2 + MD5=2155ffb638bfcf42b31818b00d9a3005 ) FetchContent_GetProperties(jemalloc) diff --git a/cmake/span.cmake b/cmake/span.cmake index 138af0b1642..0fe733dc603 100644 --- a/cmake/span.cmake +++ b/cmake/span.cmake @@ -20,8 +20,8 @@ include_guard() include(cmake/utils.cmake) FetchContent_DeclareGitHubWithMirror(span - martinmoene/span-lite v0.10.3 - MD5=ee5c6721d4f4f56a6e6f250c68ad4132 + martinmoene/span-lite v0.11.0 + MD5=9786933d03cda33ac7dccbfe85f771dd ) FetchContent_MakeAvailableWithArgs(span) diff --git a/cmake/speedb.cmake b/cmake/speedb.cmake index fccd1ecdb9a..a0bc350ece4 100644 --- a/cmake/speedb.cmake +++ b/cmake/speedb.cmake @@ -26,8 +26,8 @@ endif() include(cmake/utils.cmake) FetchContent_DeclareGitHubWithMirror(speedb - speedb-io/speedb speedb/v2.7.0 - MD5=9603a0921deb4e3cd9046cf7e9288485 + speedb-io/speedb speedb/v2.8.0 + MD5=3da818408057c8c818bfc9adc40d929f ) FetchContent_GetProperties(jemalloc) diff --git a/cmake/trie.cmake b/cmake/trie.cmake new file mode 100644 index 00000000000..30d63429c25 --- /dev/null +++ b/cmake/trie.cmake @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include_guard() + +include(cmake/utils.cmake) + +FetchContent_DeclareGitHubWithMirror(trie + Tessil/hat-trie 906e6abd1e7063f1dacd3a6b270aa654b525eb0a + MD5=a930364e9f6b60371319664bddf78000 +) + +FetchContent_MakeAvailableWithArgs(trie) diff --git a/kvrocks.conf b/kvrocks.conf index defd99854bc..e6b17dae2ef 100644 --- a/kvrocks.conf +++ b/kvrocks.conf @@ -309,7 +309,7 @@ max-bitmap-to-string-mb 16 # If enabled, the cursor will be unsigned 64-bit integers. # If disabled, the cursor will be a string. # Default: no -redis-cursor-compatible no +redis-cursor-compatible yes # Whether to enable the RESP3 protocol. # NOTICE: RESP3 is still under development, don't enable it in production environment. @@ -536,6 +536,16 @@ compaction-checker-range 0-7 # rename-command KEYS "" ################################ MIGRATE ##################################### +# Slot migration supports two ways: +# - redis-command: Migrate data by redis serialization protocol(RESP). +# - raw-key-value: Migrate the raw key value data of the storage engine directly. +# This way eliminates the overhead of converting to the redis +# command, reduces resource consumption, improves migration +# efficiency, and can implement a finer rate limit. +# +# Default: redis-command +migrate-type redis-command + # If the network bandwidth is completely consumed by the migration task, # it will affect the availability of kvrocks. To avoid this situation, # migrate-speed is adopted to limit the migrating speed. @@ -562,6 +572,18 @@ migrate-pipeline-size 16 # Default: 10000 migrate-sequence-gap 10000 +# The raw-key-value migration way uses batch for migration. This option sets the batch size +# for each migration. +# +# Default: 16kb +migrate-batch-size-kb 16 + +# Rate limit for migration based on raw-key-value, representing the maximum number of data +# that can be migrated per second. 0 means no limit. +# +# Default: 16M +migrate-batch-rate-limit-mb 16 + ################################ ROCKSDB ##################################### # Specify the capacity of column family block cache. A larger block cache @@ -700,8 +722,7 @@ rocksdb.block_size 16384 # Default: yes rocksdb.cache_index_and_filter_blocks yes -# Specify the compression to use. Only compress level greater -# than 2 to improve performance. +# Specify the compression to use. # Accept value: "no", "snappy", "lz4", "zstd", "zlib" # default snappy rocksdb.compression snappy @@ -837,7 +858,7 @@ rocksdb.read_options.async_io no # buffer cache before the write is considered complete. # If this flag is enabled, writes will be slower. # If this flag is disabled, and the machine crashes, some recent -# rites may be lost. Note that if it is just the process that +# writes may be lost. Note that if it is just the process that # crashes (i.e., the machine does not reboot), no writes will be # lost even if sync==false. # diff --git a/licenses/LICENSE-hat-trie.txt b/licenses/LICENSE-hat-trie.txt new file mode 100644 index 00000000000..e9c5ae95f36 --- /dev/null +++ b/licenses/LICENSE-hat-trie.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Thibaut Goetghebuer-Planchon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/cluster/batch_sender.cc b/src/cluster/batch_sender.cc new file mode 100644 index 00000000000..e92221ee6f3 --- /dev/null +++ b/src/cluster/batch_sender.cc @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "batch_sender.h" + +#include "io_util.h" +#include "server/redis_reply.h" +#include "time_util.h" + +Status BatchSender::Put(rocksdb::ColumnFamilyHandle *cf, const rocksdb::Slice &key, const rocksdb::Slice &value) { + // If the data is too large to fit in one batch, it needs to be split into multiple batches. + // To cover this case, we append the log data when first add metadata. + if (pending_entries_ == 0 && !prefix_logdata_.empty()) { + auto s = PutLogData(prefix_logdata_); + if (!s.IsOK()) { + return s; + } + } + auto s = write_batch_.Put(cf, key, value); + if (!s.ok()) { + return {Status::NotOK, fmt::format("failed to put key value to migration batch, {}", s.ToString())}; + } + + pending_entries_++; + entries_num_++; + return Status::OK(); +} + +Status BatchSender::Delete(rocksdb::ColumnFamilyHandle *cf, const rocksdb::Slice &key) { + auto s = write_batch_.Delete(cf, key); + if (!s.ok()) { + return {Status::NotOK, fmt::format("failed to delete key from migration batch, {}", s.ToString())}; + } + pending_entries_++; + entries_num_++; + return Status::OK(); +} + +Status BatchSender::PutLogData(const rocksdb::Slice &blob) { + auto s = write_batch_.PutLogData(blob); + if (!s.ok()) { + return {Status::NotOK, fmt::format("failed to put log data to migration batch, {}", s.ToString())}; + } + pending_entries_++; + entries_num_++; + return Status::OK(); +} + +void BatchSender::SetPrefixLogData(const std::string &prefix_logdata) { prefix_logdata_ = prefix_logdata; } + +Status BatchSender::Send() { + if (pending_entries_ == 0) { + return Status::OK(); + } + + // rate limit + if (bytes_per_sec_ > 0) { + auto single_burst = rate_limiter_->GetSingleBurstBytes(); + auto left = static_cast(write_batch_.GetDataSize()); + while (left > 0) { + auto request_size = std::min(left, single_burst); + rate_limiter_->Request(request_size, rocksdb::Env::IOPriority::IO_HIGH, nullptr); + left -= request_size; + } + } + + auto s = sendApplyBatchCmd(dst_fd_, write_batch_); + if (!s.IsOK()) { + return s.Prefixed("failed to send APPLYBATCH command"); + } + + sent_bytes_ += write_batch_.GetDataSize(); + sent_batches_num_++; + pending_entries_ = 0; + write_batch_.Clear(); + return Status::OK(); +} + +Status BatchSender::sendApplyBatchCmd(int fd, const rocksdb::WriteBatch &write_batch) { + if (fd <= 0) { + return {Status::NotOK, "invalid fd"}; + } + + GET_OR_RET(util::SockSend(fd, redis::ArrayOfBulkStrings({"APPLYBATCH", write_batch.Data()}))); + + std::string line = GET_OR_RET(util::SockReadLine(fd)); + + if (line.compare(0, 1, "-") == 0) { + return {Status::NotOK, line}; + } + + return Status::OK(); +} + +void BatchSender::SetBytesPerSecond(size_t bytes_per_sec) { + if (bytes_per_sec_ == bytes_per_sec) { + return; + } + bytes_per_sec_ = bytes_per_sec; + if (bytes_per_sec > 0) { + rate_limiter_->SetBytesPerSecond(static_cast(bytes_per_sec)); + } +} + +double BatchSender::GetRate(uint64_t since) const { + auto t = util::GetTimeStampMS(); + if (t <= since) { + return 0; + } + + return ((static_cast(sent_bytes_) / 1024.0) / (static_cast(t - since) / 1000.0)); +} diff --git a/src/cluster/batch_sender.h b/src/cluster/batch_sender.h new file mode 100644 index 00000000000..41f46d1f2f7 --- /dev/null +++ b/src/cluster/batch_sender.h @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include + +#include "status.h" + +class BatchSender { + public: + BatchSender() = default; + BatchSender(int fd, size_t max_bytes, size_t bytes_per_sec) + : dst_fd_(fd), + max_bytes_(max_bytes), + bytes_per_sec_(bytes_per_sec), + rate_limiter_(std::unique_ptr( + rocksdb::NewGenericRateLimiter(static_cast(bytes_per_sec_)))) {} + + ~BatchSender() = default; + + Status Put(rocksdb::ColumnFamilyHandle *cf, const rocksdb::Slice &key, const rocksdb::Slice &value); + Status Delete(rocksdb::ColumnFamilyHandle *cf, const rocksdb::Slice &key); + Status PutLogData(const rocksdb::Slice &blob); + void SetPrefixLogData(const std::string &prefix_logdata); + Status Send(); + + void SetMaxBytes(size_t max_bytes) { + if (max_bytes_ != max_bytes) max_bytes_ = max_bytes; + } + bool IsFull() const { return write_batch_.GetDataSize() >= max_bytes_; } + uint64_t GetSentBytes() const { return sent_bytes_; } + uint32_t GetSentBatchesNum() const { return sent_batches_num_; } + uint32_t GetEntriesNum() const { return entries_num_; } + void SetBytesPerSecond(size_t bytes_per_sec); + double GetRate(uint64_t since) const; + + private: + static Status sendApplyBatchCmd(int fd, const rocksdb::WriteBatch &write_batch); + + rocksdb::WriteBatch write_batch_{}; + std::string prefix_logdata_{}; + uint64_t sent_bytes_ = 0; + uint32_t sent_batches_num_ = 0; + uint32_t entries_num_ = 0; + uint32_t pending_entries_ = 0; + + int dst_fd_; + size_t max_bytes_; + + size_t bytes_per_sec_ = 0; // 0 means no limit + std::unique_ptr rate_limiter_; +}; diff --git a/src/cluster/slot_migrate.cc b/src/cluster/slot_migrate.cc index be54cc2e9e2..eba2f901330 100644 --- a/src/cluster/slot_migrate.cc +++ b/src/cluster/slot_migrate.cc @@ -28,6 +28,7 @@ #include "fmt/format.h" #include "io_util.h" #include "storage/batch_extractor.h" +#include "storage/iterator.h" #include "sync_migrate_context.h" #include "thread_util.h" #include "time_util.h" @@ -36,14 +37,21 @@ const char *errFailedToSendCommands = "failed to send commands to restore a key"; const char *errMigrationTaskCanceled = "key migration stopped due to a task cancellation"; const char *errFailedToSetImportStatus = "failed to set import status on destination node"; +const char *errUnsupportedMigrationType = "unsupported migration type"; static std::map type_to_cmd = { {kRedisString, "set"}, {kRedisList, "rpush"}, {kRedisHash, "hmset"}, {kRedisSet, "sadd"}, {kRedisZSet, "zadd"}, {kRedisBitmap, "setbit"}, {kRedisSortedint, "siadd"}, {kRedisStream, "xadd"}, }; -SlotMigrator::SlotMigrator(Server *srv, int max_migration_speed, int max_pipeline_size, int seq_gap_limit) - : Database(srv->storage, kDefaultNamespace), srv_(srv) { +SlotMigrator::SlotMigrator(Server *srv) + : Database(srv->storage, kDefaultNamespace), + srv_(srv), + max_migration_speed_(srv->GetConfig()->migrate_speed), + max_pipeline_size_(srv->GetConfig()->pipeline_size), + seq_gap_limit_(srv->GetConfig()->sequence_gap), + migrate_batch_bytes_per_sec_(srv->GetConfig()->migrate_batch_rate_limit_mb * MiB), + migrate_batch_size_bytes_(srv->GetConfig()->migrate_batch_size_kb * KiB) { // Let metadata_cf_handle_ be nullptr, and get them in real time to avoid accessing invalid pointer, // because metadata_cf_handle_ and db_ will be destroyed if DB is reopened. // [Situation]: @@ -61,16 +69,6 @@ SlotMigrator::SlotMigrator(Server *srv, int max_migration_speed, int max_pipelin // This problem may exist in all functions of Database called in slot migration process. metadata_cf_handle_ = nullptr; - if (max_migration_speed >= 0) { - max_migration_speed_ = max_migration_speed; - } - if (max_pipeline_size > 0) { - max_pipeline_size_ = max_pipeline_size; - } - if (seq_gap_limit > 0) { - seq_gap_limit_ = seq_gap_limit; - } - if (srv->IsSlave()) { SetStopMigrationFlag(true); } @@ -210,7 +208,7 @@ void SlotMigrator::runMigrationProcess() { break; } case SlotMigrationStage::kWAL: { - auto s = syncWal(); + auto s = syncWAL(); if (s.IsOK()) { LOG(INFO) << "[migrate] Succeed to sync from WAL for a slot " << migrating_slot_; current_stage_ = SlotMigrationStage::kSuccess; @@ -298,6 +296,24 @@ Status SlotMigrator::startMigration() { } Status SlotMigrator::sendSnapshot() { + if (srv_->GetConfig()->migrate_type == MigrationType::kRedisCommand) { + return sendSnapshotByCmd(); + } else if (srv_->GetConfig()->migrate_type == MigrationType::kRawKeyValue) { + return sendSnapshotByRawKV(); + } + return {Status::NotOK, errUnsupportedMigrationType}; +} + +Status SlotMigrator::syncWAL() { + if (srv_->GetConfig()->migrate_type == MigrationType::kRedisCommand) { + return syncWALByCmd(); + } else if (srv_->GetConfig()->migrate_type == MigrationType::kRawKeyValue) { + return syncWALByRawKV(); + } + return {Status::NotOK, errUnsupportedMigrationType}; +} + +Status SlotMigrator::sendSnapshotByCmd() { uint64_t migrated_key_cnt = 0; uint64_t expired_key_cnt = 0; uint64_t empty_key_cnt = 0; @@ -365,7 +381,7 @@ Status SlotMigrator::sendSnapshot() { return Status::OK(); } -Status SlotMigrator::syncWal() { +Status SlotMigrator::syncWALByCmd() { // Send incremental data from WAL circularly until new increment less than a certain amount auto s = syncWalBeforeForbiddingSlot(); if (!s.IsOK()) { @@ -1130,3 +1146,168 @@ void SlotMigrator::resumeSyncCtx(const Status &migrate_result) { blocking_context_ = nullptr; } } + +Status SlotMigrator::sendMigrationBatch(BatchSender *batch) { + // user may dynamically change some configs, apply it when send data + batch->SetMaxBytes(migrate_batch_size_bytes_); + batch->SetBytesPerSecond(migrate_batch_bytes_per_sec_); + return batch->Send(); +} + +Status SlotMigrator::sendSnapshotByRawKV() { + uint64_t start_ts = util::GetTimeStampMS(); + LOG(INFO) << "[migrate] Migrating snapshot of slot " << migrating_slot_ << " by raw key value"; + + rocksdb::ReadOptions read_options = storage_->DefaultScanOptions(); + read_options.snapshot = slot_snapshot_; + engine::DBIterator iter(storage_, read_options); + auto prefix = ComposeSlotKeyPrefix(namespace_, migrating_slot_); + + BatchSender batch_sender(*dst_fd_, migrate_batch_size_bytes_, migrate_batch_bytes_per_sec_); + + for (iter.Seek(prefix); iter.Valid() && iter.Key().starts_with(prefix); iter.Next()) { + auto redis_type = iter.Type(); + std::string log_data; + if (redis_type == RedisType::kRedisList) { + redis::WriteBatchLogData batch_log_data(redis_type, {std::to_string(RedisCommand::kRedisCmdRPush)}); + log_data = batch_log_data.Encode(); + } else { + redis::WriteBatchLogData batch_log_data(redis_type); + log_data = batch_log_data.Encode(); + } + batch_sender.SetPrefixLogData(log_data); + + GET_OR_RET(batch_sender.Put(storage_->GetCFHandle(engine::kMetadataColumnFamilyName), iter.Key(), iter.Value())); + + auto subkey_iter = iter.GetSubKeyIterator(); + if (!subkey_iter) { + continue; + } + + for (subkey_iter->Seek(); subkey_iter->Valid(); subkey_iter->Next()) { + GET_OR_RET(batch_sender.Put(subkey_iter->ColumnFamilyHandle(), subkey_iter->Key(), subkey_iter->Value())); + + if (redis_type == RedisType::kRedisZSet) { + InternalKey internal_key(subkey_iter->Key(), storage_->IsSlotIdEncoded()); + auto score_key = subkey_iter->Value().ToString(); + score_key.append(subkey_iter->UserKey().ToString()); + auto score_key_bytes = + InternalKey(iter.Key(), score_key, internal_key.GetVersion(), storage_->IsSlotIdEncoded()).Encode(); + GET_OR_RET(batch_sender.Put(storage_->GetCFHandle(kColumnFamilyIDZSetScore), score_key_bytes, Slice())); + } + + if (batch_sender.IsFull()) { + GET_OR_RET(sendMigrationBatch(&batch_sender)); + } + } + + if (batch_sender.IsFull()) { + GET_OR_RET(sendMigrationBatch(&batch_sender)); + } + } + + GET_OR_RET(sendMigrationBatch(&batch_sender)); + + auto elapsed = util::GetTimeStampMS() - start_ts; + LOG(INFO) << fmt::format( + "[migrate] Succeed to migrate snapshot, slot: {}, elapsed: {} ms, " + "sent: {} bytes, rate: {:.2f} kb/s, batches: {}, entries: {}", + migrating_slot_.load(), elapsed, batch_sender.GetSentBytes(), batch_sender.GetRate(start_ts), + batch_sender.GetSentBatchesNum(), batch_sender.GetEntriesNum()); + + return Status::OK(); +} + +Status SlotMigrator::syncWALByRawKV() { + uint64_t start_ts = util::GetTimeStampMS(); + LOG(INFO) << "[migrate] Syncing WAL of slot " << migrating_slot_ << " by raw key value"; + BatchSender batch_sender(*dst_fd_, migrate_batch_size_bytes_, migrate_batch_bytes_per_sec_); + + int epoch = 1; + uint64_t wal_incremental_seq = 0; + + while (epoch <= kMaxLoopTimes) { + if (catchUpIncrementalWAL()) { + break; + } + wal_incremental_seq = storage_->GetDB()->GetLatestSequenceNumber(); + auto s = migrateIncrementalDataByRawKV(wal_incremental_seq, &batch_sender); + if (!s.IsOK()) { + return {Status::NotOK, fmt::format("migrate incremental data failed, {}", s.Msg())}; + } + LOG(INFO) << fmt::format("[migrate] Migrated incremental data, epoch: {}, seq from {} to {}", epoch, wal_begin_seq_, + wal_incremental_seq); + wal_begin_seq_ = wal_incremental_seq; + epoch++; + } + + setForbiddenSlot(migrating_slot_); + + wal_incremental_seq = storage_->GetDB()->GetLatestSequenceNumber(); + if (wal_incremental_seq > wal_begin_seq_) { + auto s = migrateIncrementalDataByRawKV(wal_incremental_seq, &batch_sender); + if (!s.IsOK()) { + return {Status::NotOK, fmt::format("migrate last incremental data failed, {}", s.Msg())}; + } + LOG(INFO) << fmt::format("[migrate] Migrated last incremental data after set forbidden slot, seq from {} to {}", + wal_begin_seq_, wal_incremental_seq); + } + + auto elapsed = util::GetTimeStampMS() - start_ts; + LOG(INFO) << fmt::format( + "[migrate] Succeed to migrate incremental data, slot: {}, elapsed: {} ms, " + "sent: {} bytes, rate: {:.2f} kb/s, batches: {}, entries: {}", + migrating_slot_.load(), elapsed, batch_sender.GetSentBytes(), batch_sender.GetRate(start_ts), + batch_sender.GetSentBatchesNum(), batch_sender.GetEntriesNum()); + + return Status::OK(); +} + +bool SlotMigrator::catchUpIncrementalWAL() { + uint64_t gap = storage_->GetDB()->GetLatestSequenceNumber() - wal_begin_seq_; + if (gap <= seq_gap_limit_) { + LOG(INFO) << fmt::format("[migrate] Incremental data sequence gap: {}, less than limit: {}, set forbidden slot: {}", + gap, seq_gap_limit_, migrating_slot_.load()); + return true; + } + return false; +} + +Status SlotMigrator::migrateIncrementalDataByRawKV(uint64_t end_seq, BatchSender *batch_sender) { + engine::WALIterator wal_iter(storage_, migrating_slot_); + uint64_t start_seq = wal_begin_seq_ + 1; + for (wal_iter.Seek(start_seq); wal_iter.Valid(); wal_iter.Next()) { + if (wal_iter.NextSequenceNumber() > end_seq + 1) { + break; + } + auto item = wal_iter.Item(); + switch (item.type) { + case engine::WALItem::Type::kTypeLogData: { + GET_OR_RET(batch_sender->PutLogData(item.key)); + break; + } + case engine::WALItem::Type::kTypePut: { + GET_OR_RET(batch_sender->Put(storage_->GetCFHandle(static_cast(item.column_family_id)), + item.key, item.value)); + break; + } + case engine::WALItem::Type::kTypeDelete: { + GET_OR_RET( + batch_sender->Delete(storage_->GetCFHandle(static_cast(item.column_family_id)), item.key)); + break; + } + case engine::WALItem::Type::kTypeDeleteRange: { + // Do nothing in DeleteRange due to it might cross multiple slots. It's only used in + // FLUSHDB/FLUSHALL commands for now and maybe we can disable them while migrating. + } + default: + break; + } + if (batch_sender->IsFull()) { + GET_OR_RET(sendMigrationBatch(batch_sender)); + } + } + + // send the remaining data + return sendMigrationBatch(batch_sender); +} diff --git a/src/cluster/slot_migrate.h b/src/cluster/slot_migrate.h index 9aad1dcc5ac..8fefdbc9305 100644 --- a/src/cluster/slot_migrate.h +++ b/src/cluster/slot_migrate.h @@ -34,6 +34,7 @@ #include #include +#include "batch_sender.h" #include "config.h" #include "encoding.h" #include "parse_util.h" @@ -45,6 +46,8 @@ #include "storage/redis_db.h" #include "unique_fd.h" +enum class MigrationType { kRedisCommand = 0, kRawKeyValue }; + enum class MigrationState { kNone = 0, kStarted, kSuccess, kFailed }; enum class SlotMigrationStage { kNone, kStart, kSnapshot, kWAL, kSuccess, kFailed, kClean }; @@ -75,8 +78,7 @@ class SyncMigrateContext; class SlotMigrator : public redis::Database { public: - explicit SlotMigrator(Server *srv, int max_migration_speed = kDefaultMaxMigrationSpeed, - int max_pipeline_size = kDefaultMaxPipelineSize, int seq_gap_limit = kDefaultSequenceGapLimit); + explicit SlotMigrator(Server *srv); SlotMigrator(const SlotMigrator &other) = delete; SlotMigrator &operator=(const SlotMigrator &other) = delete; ~SlotMigrator(); @@ -94,6 +96,8 @@ class SlotMigrator : public redis::Database { void SetSequenceGapLimit(int value) { if (value > 0) seq_gap_limit_ = value; } + void SetMigrateBatchRateLimit(size_t bytes_per_sec) { migrate_batch_bytes_per_sec_ = bytes_per_sec; } + void SetMigrateBatchSize(size_t size) { migrate_batch_size_bytes_ = size; } void SetStopMigrationFlag(bool value) { stop_migration_ = value; } bool IsMigrationInProgress() const { return migration_state_ == MigrationState::kStarted; } SlotMigrationStage GetCurrentSlotMigrationStage() const { return current_stage_; } @@ -108,13 +112,16 @@ class SlotMigrator : public redis::Database { bool isTerminated() { return thread_state_ == ThreadState::Terminated; } Status startMigration(); Status sendSnapshot(); - Status syncWal(); + Status syncWAL(); Status finishSuccessfulMigration(); Status finishFailedMigration(); void clean(); Status authOnDstNode(int sock_fd, const std::string &password); Status setImportStatusOnDstNode(int sock_fd, int status); + + Status sendSnapshotByCmd(); + Status syncWALByCmd(); Status checkSingleResponse(int sock_fd); Status checkMultipleResponses(int sock_fd, int total); @@ -133,6 +140,13 @@ class SlotMigrator : public redis::Database { Status migrateIncrementData(std::unique_ptr *iter, uint64_t end_seq); Status syncWalBeforeForbiddingSlot(); Status syncWalAfterForbiddingSlot(); + + Status sendMigrationBatch(BatchSender *batch); + Status sendSnapshotByRawKV(); + Status syncWALByRawKV(); + bool catchUpIncrementalWAL(); + Status migrateIncrementalDataByRawKV(uint64_t end_seq, BatchSender *batch_sender); + void setForbiddenSlot(int16_t slot); std::unique_lock blockingLock() { return std::unique_lock(blocking_mutex_); } @@ -148,9 +162,12 @@ class SlotMigrator : public redis::Database { static const int kMaxLoopTimes = 10; Server *srv_; - int max_migration_speed_; - int max_pipeline_size_; - int seq_gap_limit_; + + int max_migration_speed_ = kDefaultMaxMigrationSpeed; + int max_pipeline_size_ = kDefaultMaxPipelineSize; + uint64_t seq_gap_limit_ = kDefaultSequenceGapLimit; + std::atomic migrate_batch_bytes_per_sec_ = 1 * GiB; + std::atomic migrate_batch_size_bytes_; SlotMigrationStage current_stage_ = SlotMigrationStage::kNone; ParserState parser_state_ = ParserState::ArrayLen; diff --git a/src/commands/cmd_bit.cc b/src/commands/cmd_bit.cc index 65e90d046d1..088e0add853 100644 --- a/src/commands/cmd_bit.cc +++ b/src/commands/cmd_bit.cc @@ -120,8 +120,9 @@ class CommandBitCount : public Commander { if (args.size() == 5) { if (util::EqualICase(args[4], "BYTE")) { + is_bit_index_ = false; } else if (util::EqualICase(args[4], "BIT")) { - return {Status::RedisExecErr, errNotImplemented}; + is_bit_index_ = true; } else { return {Status::RedisParseErr, errInvalidSyntax}; } @@ -133,7 +134,7 @@ class CommandBitCount : public Commander { Status Execute(Server *srv, Connection *conn, std::string *output) override { uint32_t cnt = 0; redis::Bitmap bitmap_db(srv->storage, conn->GetNamespace()); - auto s = bitmap_db.BitCount(args_[1], start_, stop_, &cnt); + auto s = bitmap_db.BitCount(args_[1], start_, stop_, is_bit_index_, &cnt); if (!s.ok()) return {Status::RedisExecErr, s.ToString()}; *output = redis::Integer(cnt); @@ -143,6 +144,7 @@ class CommandBitCount : public Commander { private: int64_t start_ = 0; int64_t stop_ = -1; + bool is_bit_index_ = false; }; class CommandBitPos : public Commander { diff --git a/src/commands/cmd_cluster.cc b/src/commands/cmd_cluster.cc index 4554a3270cf..96ecfbde28b 100644 --- a/src/commands/cmd_cluster.cc +++ b/src/commands/cmd_cluster.cc @@ -85,7 +85,7 @@ class CommandCluster : public Commander { std::string nodes_desc; Status s = srv->cluster->GetClusterNodes(&nodes_desc); if (s.IsOK()) { - *output = redis::BulkString(nodes_desc); + *output = conn->VerbatimString("txt", nodes_desc); } else { return {Status::RedisExecErr, s.Msg()}; } @@ -93,7 +93,7 @@ class CommandCluster : public Commander { std::string cluster_info; Status s = srv->cluster->GetClusterInfo(&cluster_info); if (s.IsOK()) { - *output = redis::BulkString(cluster_info); + *output = conn->VerbatimString("txt", cluster_info); } else { return {Status::RedisExecErr, s.Msg()}; } diff --git a/src/commands/cmd_hash.cc b/src/commands/cmd_hash.cc index cc4c475ebad..6db97f89025 100644 --- a/src/commands/cmd_hash.cc +++ b/src/commands/cmd_hash.cc @@ -439,7 +439,7 @@ REDIS_REGISTER_COMMANDS(MakeCmdAttr("hget", 3, "read-only", 1, 1, 1 MakeCmdAttr("hincrbyfloat", 4, "write", 1, 1, 1), MakeCmdAttr("hset", -4, "write", 1, 1, 1), MakeCmdAttr("hsetnx", -4, "write", 1, 1, 1), - MakeCmdAttr("hdel", -3, "write", 1, 1, 1), + MakeCmdAttr("hdel", -3, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("hstrlen", 3, "read-only", 1, 1, 1), MakeCmdAttr("hexists", 3, "read-only", 1, 1, 1), MakeCmdAttr("hlen", 2, "read-only", 1, 1, 1), diff --git a/src/commands/cmd_json.cc b/src/commands/cmd_json.cc index 8cd49c51e3f..54a28271eae 100644 --- a/src/commands/cmd_json.cc +++ b/src/commands/cmd_json.cc @@ -600,15 +600,15 @@ REDIS_REGISTER_COMMANDS(MakeCmdAttr("json.set", 4, "write", 1, 1 MakeCmdAttr("json.type", -2, "read-only", 1, 1, 1), MakeCmdAttr("json.arrappend", -4, "write", 1, 1, 1), MakeCmdAttr("json.arrinsert", -5, "write", 1, 1, 1), - MakeCmdAttr("json.arrtrim", 5, "write", 1, 1, 1), - MakeCmdAttr("json.clear", -2, "write", 1, 1, 1), + MakeCmdAttr("json.arrtrim", 5, "write no-dbsize-check", 1, 1, 1), + MakeCmdAttr("json.clear", -2, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("json.toggle", -2, "write", 1, 1, 1), MakeCmdAttr("json.arrlen", -2, "read-only", 1, 1, 1), MakeCmdAttr("json.merge", 4, "write", 1, 1, 1), MakeCmdAttr("json.objkeys", -2, "read-only", 1, 1, 1), MakeCmdAttr("json.arrpop", -2, "write", 1, 1, 1), MakeCmdAttr("json.arrindex", -4, "read-only", 1, 1, 1), - MakeCmdAttr("json.del", -2, "write", 1, 1, 1), + MakeCmdAttr("json.del", -2, "write no-dbsize-check", 1, 1, 1), // JSON.FORGET is an alias for JSON.DEL, refer: https://redis.io/commands/json.forget/ MakeCmdAttr("json.forget", -2, "write", 1, 1, 1), MakeCmdAttr("json.numincrby", 4, "write", 1, 1, 1), diff --git a/src/commands/cmd_key.cc b/src/commands/cmd_key.cc index f94f87fecf5..2eacdd1ef74 100644 --- a/src/commands/cmd_key.cc +++ b/src/commands/cmd_key.cc @@ -350,8 +350,8 @@ REDIS_REGISTER_COMMANDS(MakeCmdAttr("ttl", 2, "read-only", 1, 1, 1), MakeCmdAttr("pexpireat", 3, "write", 1, 1, 1), MakeCmdAttr("expiretime", 2, "read-only", 1, 1, 1), MakeCmdAttr("pexpiretime", 2, "read-only", 1, 1, 1), - MakeCmdAttr("del", -2, "write", 1, -1, 1), - MakeCmdAttr("unlink", -2, "write", 1, -1, 1), + MakeCmdAttr("del", -2, "write no-dbsize-check", 1, -1, 1), + MakeCmdAttr("unlink", -2, "write no-dbsize-check", 1, -1, 1), MakeCmdAttr("rename", 3, "write", 1, 2, 1), MakeCmdAttr("renamenx", 3, "write", 1, 2, 1), ) diff --git a/src/commands/cmd_list.cc b/src/commands/cmd_list.cc index 726d2a70889..f354d64cc4b 100644 --- a/src/commands/cmd_list.cc +++ b/src/commands/cmd_list.cc @@ -861,9 +861,9 @@ REDIS_REGISTER_COMMANDS(MakeCmdAttr("blpop", -3, "write no-script" MakeCmdAttr("lpush", -3, "write", 1, 1, 1), MakeCmdAttr("lpushx", -3, "write", 1, 1, 1), MakeCmdAttr("lrange", 4, "read-only", 1, 1, 1), - MakeCmdAttr("lrem", 4, "write", 1, 1, 1), + MakeCmdAttr("lrem", 4, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("lset", 4, "write", 1, 1, 1), - MakeCmdAttr("ltrim", 4, "write", 1, 1, 1), + MakeCmdAttr("ltrim", 4, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("lmpop", -4, "write", CommandLMPop::keyRangeGen), MakeCmdAttr("rpop", -2, "write", 1, 1, 1), MakeCmdAttr("rpoplpush", 3, "write", 1, 2, 1), diff --git a/src/commands/cmd_server.cc b/src/commands/cmd_server.cc index 10cbc8cdc4e..0463cf8f08d 100644 --- a/src/commands/cmd_server.cc +++ b/src/commands/cmd_server.cc @@ -278,7 +278,7 @@ class CommandInfo : public Commander { } std::string info; srv->GetInfo(conn->GetNamespace(), section, &info); - *output = redis::BulkString(info); + *output = conn->VerbatimString("txt", info); return Status::OK(); } }; @@ -503,10 +503,10 @@ class CommandClient : public Commander { Status Execute(Server *srv, Connection *conn, std::string *output) override { if (subcommand_ == "list") { - *output = redis::BulkString(srv->GetClientsStr()); + *output = conn->VerbatimString("txt", srv->GetClientsStr()); return Status::OK(); } else if (subcommand_ == "info") { - *output = redis::BulkString(conn->ToString()); + *output = conn->VerbatimString("txt", conn->ToString()); return Status::OK(); } else if (subcommand_ == "setname") { conn->SetName(conn_name_); @@ -594,8 +594,16 @@ class CommandDebug : public Commander { } else if (subcommand_ == "protocol" && args.size() == 3) { protocol_type_ = util::ToLower(args[2]); return Status::OK(); + } else if (subcommand_ == "dbsize-limit" && args.size() == 3) { + auto val = ParseInt(args[2], {0, 1}, 10); + if (!val) { + return {Status::RedisParseErr, "invalid debug dbsize-limit value"}; + } + + dbsize_limit_ = static_cast(val); + return Status::OK(); } - return {Status::RedisInvalidCmd, "Syntax error, DEBUG SLEEP |PROTOCOL "}; + return {Status::RedisInvalidCmd, "Syntax error, DEBUG SLEEP |PROTOCOL |DBSIZE-LIMIT <0|1>"}; } Status Execute(Server *srv, Connection *conn, std::string *output) override { @@ -633,13 +641,25 @@ class CommandDebug : public Commander { *output = conn->Bool(false); } else if (protocol_type_ == "null") { *output = conn->NilString(); + } else if (protocol_type_ == "attrib") { + *output = conn->HeaderOfAttribute(1); + *output += redis::BulkString("key-popularity"); + *output += redis::Array({ + redis::BulkString("key:123"), + redis::Integer(90), + }); + } else if (protocol_type_ == "verbatim") { // verbatim string + *output = conn->VerbatimString("txt", "verbatim string"); } else { *output = redis::Error( "Wrong protocol type name. Please use one of the following: " - "string|integer|double|array|set|bignum|true|false|null"); + "string|integer|double|array|set|bignum|true|false|null|attrib|verbatim"); } + } else if (subcommand_ == "dbsize-limit") { + srv->storage->SetDBSizeLimit(dbsize_limit_); + *output = redis::SimpleString("OK"); } else { - return {Status::RedisInvalidCmd, "Unknown subcommand, should be DEBUG or PROTOCOL"}; + return {Status::RedisInvalidCmd, "Unknown subcommand, should be SLEEP, PROTOCOL or DBSIZE-LIMIT"}; } return Status::OK(); } @@ -648,6 +668,7 @@ class CommandDebug : public Commander { std::string subcommand_; std::string protocol_type_; uint64_t microsecond_ = 0; + bool dbsize_limit_ = false; }; class CommandCommand : public Commander { @@ -1318,8 +1339,8 @@ REDIS_REGISTER_COMMANDS(MakeCmdAttr("auth", 2, "read-only ok-loadin MakeCmdAttr("config", -2, "read-only", 0, 0, 0, GenerateConfigFlag), MakeCmdAttr("namespace", -3, "read-only exclusive", 0, 0, 0), MakeCmdAttr("keys", 2, "read-only", 0, 0, 0), - MakeCmdAttr("flushdb", 1, "write", 0, 0, 0), - MakeCmdAttr("flushall", 1, "write", 0, 0, 0), + MakeCmdAttr("flushdb", 1, "write no-dbsize-check", 0, 0, 0), + MakeCmdAttr("flushall", 1, "write no-dbsize-check", 0, 0, 0), MakeCmdAttr("dbsize", -1, "read-only", 0, 0, 0), MakeCmdAttr("slowlog", -2, "read-only", 0, 0, 0), MakeCmdAttr("perflog", -2, "read-only", 0, 0, 0), diff --git a/src/commands/cmd_set.cc b/src/commands/cmd_set.cc index ced252234b2..213a6768279 100644 --- a/src/commands/cmd_set.cc +++ b/src/commands/cmd_set.cc @@ -438,7 +438,7 @@ class CommandSScan : public CommandSubkeyScanBase { }; REDIS_REGISTER_COMMANDS(MakeCmdAttr("sadd", -3, "write", 1, 1, 1), - MakeCmdAttr("srem", -3, "write", 1, 1, 1), + MakeCmdAttr("srem", -3, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("scard", 2, "read-only", 1, 1, 1), MakeCmdAttr("smembers", 2, "read-only", 1, 1, 1), MakeCmdAttr("sismember", 3, "read-only", 1, 1, 1), diff --git a/src/commands/cmd_sortedint.cc b/src/commands/cmd_sortedint.cc index b668a0a69e6..a97e357f154 100644 --- a/src/commands/cmd_sortedint.cc +++ b/src/commands/cmd_sortedint.cc @@ -250,7 +250,7 @@ class CommandSortedintRevRangeByValue : public CommandSortedintRangeByValue { }; REDIS_REGISTER_COMMANDS(MakeCmdAttr("siadd", -3, "write", 1, 1, 1), - MakeCmdAttr("sirem", -3, "write", 1, 1, 1), + MakeCmdAttr("sirem", -3, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("sicard", 2, "read-only", 1, 1, 1), MakeCmdAttr("siexists", -3, "read-only", 1, 1, 1), MakeCmdAttr("sirange", -4, "read-only", 1, 1, 1), diff --git a/src/commands/cmd_stream.cc b/src/commands/cmd_stream.cc index 7ba408859c1..76e2146b7b0 100644 --- a/src/commands/cmd_stream.cc +++ b/src/commands/cmd_stream.cc @@ -1190,14 +1190,14 @@ class CommandXSetId : public Commander { }; REDIS_REGISTER_COMMANDS(MakeCmdAttr("xadd", -5, "write", 1, 1, 1), - MakeCmdAttr("xdel", -3, "write", 1, 1, 1), + MakeCmdAttr("xdel", -3, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("xgroup", -4, "write", 2, 2, 1), MakeCmdAttr("xlen", -2, "read-only", 1, 1, 1), MakeCmdAttr("xinfo", -2, "read-only", 0, 0, 0), MakeCmdAttr("xrange", -4, "read-only", 1, 1, 1), MakeCmdAttr("xrevrange", -2, "read-only", 1, 1, 1), MakeCmdAttr("xread", -4, "read-only", 0, 0, 0), - MakeCmdAttr("xtrim", -4, "write", 1, 1, 1), + MakeCmdAttr("xtrim", -4, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("xsetid", -3, "write", 1, 1, 1)) } // namespace redis diff --git a/src/commands/cmd_string.cc b/src/commands/cmd_string.cc index debf5e3fb7b..3f7b5090b8c 100644 --- a/src/commands/cmd_string.cc +++ b/src/commands/cmd_string.cc @@ -626,7 +626,7 @@ REDIS_REGISTER_COMMANDS( MakeCmdAttr("getset", 3, "write", 1, 1, 1), MakeCmdAttr("getrange", 4, "read-only", 1, 1, 1), MakeCmdAttr("substr", 4, "read-only", 1, 1, 1), - MakeCmdAttr("getdel", 2, "write", 1, 1, 1), + MakeCmdAttr("getdel", 2, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("setrange", 4, "write", 1, 1, 1), MakeCmdAttr("mget", -2, "read-only", 1, -1, 1), MakeCmdAttr("append", 3, "write", 1, 1, 1), MakeCmdAttr("set", -3, "write", 1, 1, 1), diff --git a/src/commands/cmd_zset.cc b/src/commands/cmd_zset.cc index d78fe54eed8..136b84eb5e2 100644 --- a/src/commands/cmd_zset.cc +++ b/src/commands/cmd_zset.cc @@ -1537,10 +1537,10 @@ REDIS_REGISTER_COMMANDS(MakeCmdAttr("zadd", -4, "write", 1, 1, 1), MakeCmdAttr("zrevrangebylex", -4, "read-only", 1, 1, 1), MakeCmdAttr("zrangebyscore", -4, "read-only", 1, 1, 1), MakeCmdAttr("zrank", -3, "read-only", 1, 1, 1), - MakeCmdAttr("zrem", -3, "write", 1, 1, 1), - MakeCmdAttr("zremrangebyrank", 4, "write", 1, 1, 1), - MakeCmdAttr("zremrangebyscore", 4, "write", 1, 1, 1), - MakeCmdAttr("zremrangebylex", 4, "write", 1, 1, 1), + MakeCmdAttr("zrem", -3, "write no-dbsize-check", 1, 1, 1), + MakeCmdAttr("zremrangebyrank", 4, "write no-dbsize-check", 1, 1, 1), + MakeCmdAttr("zremrangebyscore", 4, "write no-dbsize-check", 1, 1, 1), + MakeCmdAttr("zremrangebylex", 4, "write no-dbsize-check", 1, 1, 1), MakeCmdAttr("zrevrangebyscore", -4, "read-only", 1, 1, 1), MakeCmdAttr("zrevrank", -3, "read-only", 1, 1, 1), MakeCmdAttr("zscore", 3, "read-only", 1, 1, 1), diff --git a/src/commands/commander.h b/src/commands/commander.h index 3330337c746..d759bd2047e 100644 --- a/src/commands/commander.h +++ b/src/commands/commander.h @@ -52,18 +52,19 @@ class Connection; struct CommandAttributes; enum CommandFlags : uint64_t { - kCmdWrite = 1ULL << 0, // "write" flag - kCmdReadOnly = 1ULL << 1, // "read-only" flag - kCmdReplication = 1ULL << 2, // "replication" flag - kCmdPubSub = 1ULL << 3, // "pub-sub" flag - kCmdScript = 1ULL << 4, // "script" flag - kCmdLoading = 1ULL << 5, // "ok-loading" flag - kCmdMulti = 1ULL << 6, // "multi" flag - kCmdExclusive = 1ULL << 7, // "exclusive" flag - kCmdNoMulti = 1ULL << 8, // "no-multi" flag - kCmdNoScript = 1ULL << 9, // "no-script" flag - kCmdROScript = 1ULL << 10, // "ro-script" flag for read-only script commands - kCmdCluster = 1ULL << 11, // "cluster" flag + kCmdWrite = 1ULL << 0, // "write" flag + kCmdReadOnly = 1ULL << 1, // "read-only" flag + kCmdReplication = 1ULL << 2, // "replication" flag + kCmdPubSub = 1ULL << 3, // "pub-sub" flag + kCmdScript = 1ULL << 4, // "script" flag + kCmdLoading = 1ULL << 5, // "ok-loading" flag + kCmdMulti = 1ULL << 6, // "multi" flag + kCmdExclusive = 1ULL << 7, // "exclusive" flag + kCmdNoMulti = 1ULL << 8, // "no-multi" flag + kCmdNoScript = 1ULL << 9, // "no-script" flag + kCmdROScript = 1ULL << 10, // "ro-script" flag for read-only script commands + kCmdCluster = 1ULL << 11, // "cluster" flag + kCmdNoDBSizeCheck = 1ULL << 12, // "no-dbsize-check" flag }; class Commander { @@ -148,6 +149,10 @@ struct CommandAttributes { if (flag_gen) res |= flag_gen(args); return res; } + + bool CheckArity(int cmd_size) const { + return !((arity > 0 && cmd_size != arity) || (arity < 0 && cmd_size < -arity)); + } }; using CommandMap = std::map; @@ -178,6 +183,8 @@ inline uint64_t ParseCommandFlags(const std::string &description, const std::str flags |= kCmdROScript; else if (flag == "cluster") flags |= kCmdCluster; + else if (flag == "no-dbsize-check") + flags |= kCmdNoDBSizeCheck; else { std::cout << fmt::format("Encountered non-existent flag '{}' in command {} in command attribute parsing", flag, cmd_name) diff --git a/src/common/bit_util.h b/src/common/bit_util.h new file mode 100644 index 00000000000..9c23d78bb8d --- /dev/null +++ b/src/common/bit_util.h @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +namespace util { + +/* Count number of bits set in the binary array pointed by 's' and long + * 'count' bytes. The implementation of this function is required to + * work with a input string length up to 512 MB. + * */ +inline size_t RawPopcount(const uint8_t *p, int64_t count) { + size_t bits = 0; + + for (; count >= 8; p += 8, count -= 8) { + bits += __builtin_popcountll(*reinterpret_cast(p)); + } + + if (count > 0) { + uint64_t v = 0; + __builtin_memcpy(&v, p, count); + bits += __builtin_popcountll(v); + } + + return bits; +} + +template +inline int ClzllWithEndian(uint64_t x) { + if constexpr (IsLittleEndian()) { + return __builtin_clzll(__builtin_bswap64(x)); + } else if constexpr (IsBigEndian()) { + return __builtin_clzll(x); + } else { + static_assert(AlwaysFalse); + } +} + +// Return the number of bytes needed to fit the given number of bits +constexpr int64_t BytesForBits(int64_t bits) { + // This formula avoids integer overflow on very large `bits` + return (bits >> 3) + ((bits & 7) != 0); +} + +namespace lsb { +static constexpr bool GetBit(const uint8_t *bits, uint64_t i) { return (bits[i >> 3] >> (i & 0x07)) & 1; } + +// Bitmask selecting the k-th bit in a byte +static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; + +// Gets the i-th bit from a byte. Should only be used with i <= 7. +static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } + +static inline void SetBitTo(uint8_t *bits, int64_t i, bool bit_is_set) { + // https://graphics.stanford.edu/~seander/bithacks.html + // "Conditionally set or clear bits without branching" + // NOTE: this seems to confuse Valgrind as it reads from potentially + // uninitialized memory + bits[i / 8] ^= static_cast(-static_cast(bit_is_set) ^ bits[i / 8]) & kBitmask[i % 8]; +} +} // namespace lsb + +namespace msb { +static constexpr bool GetBit(const uint8_t *bits, uint64_t i) { return (bits[i >> 3] >> (7 - (i & 0x07))) & 1; } + +// Bitmask selecting the k-th bit in a byte +static constexpr uint8_t kBitmask[] = {128, 64, 32, 16, 8, 4, 2, 1}; + +// Gets the i-th bit from a byte. Should only be used with i <= 7. +static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } + +static inline void SetBitTo(uint8_t *bits, int64_t i, bool bit_is_set) { + // https://graphics.stanford.edu/~seander/bithacks.html + // "Conditionally set or clear bits without branching" + // NOTE: this seems to confuse Valgrind as it reads from potentially + // uninitialized memory + bits[i / 8] ^= static_cast(-static_cast(bit_is_set) ^ bits[i / 8]) & kBitmask[i % 8]; +} + +/* Return the position of the first bit set to one (if 'bit' is 1) or + * zero (if 'bit' is 0) in the bitmap starting at 's' and long 'count' bytes. + * + * The function is guaranteed to return a value >= 0 if 'bit' is 0 since if + * no zero bit is found, it returns count*8 assuming the string is zero + * padded on the right. However if 'bit' is 1 it is possible that there is + * not a single set bit in the bitmap. In this special case -1 is returned. + * */ +inline int64_t RawBitpos(const uint8_t *c, int64_t count, bool bit) { + int64_t res = 0; + + if (bit) { + int64_t ct = count; + + for (; count >= 8; c += 8, count -= 8) { + uint64_t x = *reinterpret_cast(c); + if (x != 0) { + return res + ClzllWithEndian(x); + } + res += 64; + } + + if (count > 0) { + uint64_t v = 0; + __builtin_memcpy(&v, c, count); + res += v == 0 ? count * 8 : ClzllWithEndian(v); + } + + if (res == ct * 8) { + return -1; + } + } else { + for (; count >= 8; c += 8, count -= 8) { + uint64_t x = *reinterpret_cast(c); + if (x != (uint64_t)-1) { + return res + ClzllWithEndian(~x); + } + res += 64; + } + + if (count > 0) { + uint64_t v = -1; + __builtin_memcpy(&v, c, count); + res += v == (uint64_t)-1 ? count * 8 : ClzllWithEndian(~v); + } + } + + return res; +} + +} // namespace msb + +} // namespace util diff --git a/src/common/status.h b/src/common/status.h index 37eae9d8281..ade19f86d0e 100644 --- a/src/common/status.h +++ b/src/common/status.h @@ -66,6 +66,9 @@ class [[nodiscard]] Status { // Blocking BlockingCmd, + + // Search + NoPrefixMatched, }; Status() : impl_{nullptr} {} diff --git a/src/config/config.cc b/src/config/config.cc index 7a944f6c4ad..db7ee712a99 100644 --- a/src/config/config.cc +++ b/src/config/config.cc @@ -83,6 +83,9 @@ const std::vector> cache_types{[] { return res; }()}; +const std::vector> migration_types{{"redis-command", MigrationType::kRedisCommand}, + {"raw-key-value", MigrationType::kRawKeyValue}}; + std::string TrimRocksDbPrefix(std::string s) { if (strncasecmp(s.data(), "rocksdb.", 8) != 0) return s; return s.substr(8, s.size() - 8); @@ -168,6 +171,10 @@ Config::Config() { {"migrate-speed", false, new IntField(&migrate_speed, 4096, 0, INT_MAX)}, {"migrate-pipeline-size", false, new IntField(&pipeline_size, 16, 1, INT_MAX)}, {"migrate-sequence-gap", false, new IntField(&sequence_gap, 10000, 1, INT_MAX)}, + {"migrate-type", false, + new EnumField(&migrate_type, migration_types, MigrationType::kRedisCommand)}, + {"migrate-batch-size-kb", false, new IntField(&migrate_batch_size_kb, 16, 1, INT_MAX)}, + {"migrate-batch-rate-limit-mb", false, new IntField(&migrate_batch_rate_limit_mb, 16, 0, INT_MAX)}, {"unixsocket", true, new StringField(&unixsocket, "")}, {"unixsocketperm", true, new OctalField(&unixsocketperm, 0777, 1, INT_MAX)}, {"log-retention-days", false, new IntField(&log_retention_days, -1, -1, INT_MAX)}, @@ -508,6 +515,18 @@ void Config::initFieldCallback() { if (cluster_enabled) srv->slot_migrator->SetSequenceGapLimit(sequence_gap); return Status::OK(); }}, + {"migrate-batch-rate-limit-mb", + [this](Server *srv, const std::string &k, const std::string &v) -> Status { + if (!srv) return Status::OK(); + srv->slot_migrator->SetMigrateBatchRateLimit(migrate_batch_rate_limit_mb * MiB); + return Status::OK(); + }}, + {"migrate-batch-size-kb", + [this](Server *srv, const std::string &k, const std::string &v) -> Status { + if (!srv) return Status::OK(); + srv->slot_migrator->SetMigrateBatchSize(migrate_batch_size_kb * KiB); + return Status::OK(); + }}, {"log-level", [this](Server *srv, const std::string &k, const std::string &v) -> Status { if (!srv) return Status::OK(); diff --git a/src/config/config.h b/src/config/config.h index e69ad5e580f..a24c59e03fe 100644 --- a/src/config/config.h +++ b/src/config/config.h @@ -36,6 +36,7 @@ // forward declaration class Server; +enum class MigrationType; namespace engine { class Storage; } @@ -144,9 +145,13 @@ struct Config { bool persist_cluster_nodes_enabled = true; bool slot_id_encoded = false; bool cluster_enabled = false; + int migrate_speed; int pipeline_size; int sequence_gap; + MigrationType migrate_type; + int migrate_batch_size_kb; + int migrate_batch_rate_limit_mb; bool redis_cursor_compatible = false; bool resp3_enabled = false; diff --git a/src/config/config_type.h b/src/config/config_type.h index e2918392d4b..d84ce1cb423 100644 --- a/src/config/config_type.h +++ b/src/config/config_type.h @@ -137,7 +137,7 @@ class IntegerField : public ConfigField { } Status Set(const std::string &v) override { auto s = ParseInt(v, {min_, max_}); - if (!s.IsOK()) return s; + if (!s.IsOK()) return std::move(s); *receiver_ = s.GetValue(); return Status::OK(); } @@ -164,7 +164,7 @@ class OctalField : public ConfigField { } Status Set(const std::string &v) override { auto s = ParseInt(v, {min_, max_}, 8); - if (!s.IsOK()) return s; + if (!s.IsOK()) return std::move(s); *receiver_ = *s; return Status::OK(); } diff --git a/src/search/indexer.cc b/src/search/indexer.cc new file mode 100644 index 00000000000..f608d3df6dc --- /dev/null +++ b/src/search/indexer.cc @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "indexer.h" + +#include + +#include "storage/redis_metadata.h" +#include "types/redis_hash.h" + +namespace redis { + +StatusOr FieldValueRetriever::Create(SearchOnDataType type, std::string_view key, + engine::Storage *storage, const std::string &ns) { + if (type == SearchOnDataType::HASH) { + Hash db(storage, ns); + std::string ns_key = db.AppendNamespacePrefix(key); + HashMetadata metadata(false); + auto s = db.GetMetadata(ns_key, &metadata); + if (!s.ok()) return {Status::NotOK, s.ToString()}; + return FieldValueRetriever(db, metadata, key); + } else if (type == SearchOnDataType::JSON) { + Json db(storage, ns); + std::string ns_key = db.AppendNamespacePrefix(key); + JsonMetadata metadata(false); + JsonValue value; + auto s = db.read(ns_key, &metadata, &value); + if (!s.ok()) return {Status::NotOK, s.ToString()}; + return FieldValueRetriever(value); + } else { + assert(false && "unreachable code: unexpected SearchOnDataType"); + __builtin_unreachable(); + } +} + +rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, std::string *output) { + if (std::holds_alternative(db)) { + auto &[hash, metadata, key] = std::get(db); + std::string ns_key = hash.AppendNamespacePrefix(key); + LatestSnapShot ss(hash.storage_); + rocksdb::ReadOptions read_options; + read_options.snapshot = ss.GetSnapShot(); + std::string sub_key = InternalKey(ns_key, field, metadata.version, hash.storage_->IsSlotIdEncoded()).Encode(); + return hash.storage_->Get(read_options, sub_key, output); + } else if (std::holds_alternative(db)) { + auto &value = std::get(db); + auto s = value.Get(field); + if (!s.IsOK()) return rocksdb::Status::Corruption(s.Msg()); + if (s->value.size() != 1) + return rocksdb::Status::NotFound("json value specified by the field (json path) should exist and be unique"); + *output = s->value[0].as_string(); + return rocksdb::Status::OK(); + } else { + __builtin_unreachable(); + } +} + +StatusOr IndexUpdater::Record(std::string_view key, const std::string &ns) { + Database db(indexer->storage, ns); + + RedisType type = kRedisNone; + auto s = db.Type(key, &type); + if (!s.ok()) return {Status::NotOK, s.ToString()}; + + if (type != static_cast(on_data_type)) { + // not the expected type, stop record + return {Status::NotOK, "this data type cannot be indexed"}; + } + + auto retriever = GET_OR_RET(FieldValueRetriever::Create(on_data_type, key, indexer->storage, ns)); + + FieldValues values; + for (const auto &[field, info] : fields) { + std::string value; + auto s = retriever.Retrieve(field, &value); + if (s.IsNotFound()) continue; + if (!s.ok()) return {Status::NotOK, s.ToString()}; + + values.emplace(field, value); + } + + return values; +} + +void GlobalIndexer::Add(IndexUpdater updater) { + auto &up = updaters.emplace_back(std::move(updater)); + for (const auto &prefix : up.prefixes) { + prefix_map.emplace(prefix, &up); + } +} + +StatusOr GlobalIndexer::Record(std::string_view key, const std::string &ns) { + auto iter = prefix_map.longest_prefix(key); + if (iter != prefix_map.end()) { + return iter.value()->Record(key, ns); + } + + return {Status::NoPrefixMatched}; +} + +} // namespace redis diff --git a/src/search/indexer.h b/src/search/indexer.h new file mode 100644 index 00000000000..e153d55584c --- /dev/null +++ b/src/search/indexer.h @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include + +#include +#include +#include +#include + +#include "commands/commander.h" +#include "config/config.h" +#include "indexer.h" +#include "search/search_encoding.h" +#include "server/server.h" +#include "storage/redis_metadata.h" +#include "storage/storage.h" +#include "types/redis_hash.h" +#include "types/redis_json.h" + +namespace redis { + +struct GlobalIndexer; + +struct FieldValueRetriever { + struct HashData { + Hash hash; + HashMetadata metadata; + std::string_view key; + + HashData(Hash hash, HashMetadata metadata, std::string_view key) + : hash(std::move(hash)), metadata(std::move(metadata)), key(key) {} + }; + using JsonData = JsonValue; + + using Variant = std::variant; + Variant db; + + static StatusOr Create(SearchOnDataType type, std::string_view key, engine::Storage *storage, + const std::string &ns); + + explicit FieldValueRetriever(Hash hash, HashMetadata metadata, std::string_view key) + : db(std::in_place_type, std::move(hash), std::move(metadata), key) {} + + explicit FieldValueRetriever(JsonValue json) : db(std::in_place_type, std::move(json)) {} + + rocksdb::Status Retrieve(std::string_view field, std::string *output); +}; + +struct IndexUpdater { + using FieldValues = std::map; + + SearchOnDataType on_data_type; + std::vector prefixes; + std::map> fields; + GlobalIndexer *indexer = nullptr; + + StatusOr Record(std::string_view key, const std::string &ns); +}; + +struct GlobalIndexer { + std::deque updaters; + tsl::htrie_map prefix_map; + + engine::Storage *storage = nullptr; + + explicit GlobalIndexer(engine::Storage *storage) : storage(storage) {} + + void Add(IndexUpdater updater); + StatusOr Record(std::string_view key, const std::string &ns); +}; + +} // namespace redis diff --git a/src/search/search_encoding.h b/src/search/search_encoding.h new file mode 100644 index 00000000000..2acec050dde --- /dev/null +++ b/src/search/search_encoding.h @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include + +namespace redis { + +inline constexpr auto kErrorInsufficientLength = "insufficient length while decoding metadata"; + +enum class SearchSubkeyType : uint8_t { + // search global metadata + PREFIXES = 1, + + // field metadata for different types + TAG_FIELD_META = 64 + 1, + NUMERIC_FIELD_META = 64 + 2, + + // field indexing for different types + TAG_FIELD = 128 + 1, + NUMERIC_FIELD = 128 + 2, +}; + +inline std::string ConstructSearchPrefixesSubkey() { return {(char)SearchSubkeyType::PREFIXES}; } + +struct SearchPrefixesMetadata { + std::vector prefixes; + + void Encode(std::string *dst) const { + for (const auto &prefix : prefixes) { + PutFixed32(dst, prefix.size()); + dst->append(prefix); + } + } + + rocksdb::Status Decode(Slice *input) { + uint32_t size = 0; + + while (GetFixed32(input, &size)) { + if (input->size() < size) return rocksdb::Status::Corruption(kErrorInsufficientLength); + prefixes.emplace_back(input->data(), size); + input->remove_prefix(size); + } + + return rocksdb::Status::OK(); + } +}; + +struct SearchFieldMetadata { + bool noindex = false; + + // flag: + uint8_t MakeFlag() const { return noindex; } + + void DecodeFlag(uint8_t flag) { noindex = flag & 1; } + + virtual void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); } + + virtual rocksdb::Status Decode(Slice *input) { + uint8_t flag = 0; + if (!GetFixed8(input, &flag)) { + return rocksdb::Status::Corruption(kErrorInsufficientLength); + } + + DecodeFlag(flag); + return rocksdb::Status::OK(); + } +}; + +inline std::string ConstructTagFieldMetadataSubkey(std::string_view field_name) { + std::string res = {(char)SearchSubkeyType::TAG_FIELD_META}; + res.append(field_name); + return res; +} + +struct SearchTagFieldMetadata : SearchFieldMetadata { + char separator = ','; + bool case_sensitive = false; + + void Encode(std::string *dst) const override { + SearchFieldMetadata::Encode(dst); + PutFixed8(dst, separator); + PutFixed8(dst, case_sensitive); + } + + rocksdb::Status Decode(Slice *input) override { + if (auto s = SearchFieldMetadata::Decode(input); !s.ok()) { + return s; + } + + if (input->size() < 8 + 8) { + return rocksdb::Status::Corruption(kErrorInsufficientLength); + } + + GetFixed8(input, (uint8_t *)&separator); + GetFixed8(input, (uint8_t *)&case_sensitive); + return rocksdb::Status::OK(); + } +}; + +inline std::string ConstructNumericFieldMetadataSubkey(std::string_view field_name) { + std::string res = {(char)SearchSubkeyType::NUMERIC_FIELD_META}; + res.append(field_name); + return res; +} + +struct SearchNumericFieldMetadata : SearchFieldMetadata {}; + +inline std::string ConstructTagFieldSubkey(std::string_view field_name, std::string_view tag, std::string_view key) { + std::string res = {(char)SearchSubkeyType::TAG_FIELD}; + PutFixed32(&res, field_name.size()); + res.append(field_name); + PutFixed32(&res, tag.size()); + res.append(tag); + PutFixed32(&res, key.size()); + res.append(key); + return res; +} + +inline std::string ConstructNumericFieldSubkey(std::string_view field_name, double number, std::string_view key) { + std::string res = {(char)SearchSubkeyType::NUMERIC_FIELD}; + PutFixed32(&res, field_name.size()); + res.append(field_name); + PutDouble(&res, number); + PutFixed32(&res, key.size()); + res.append(key); + return res; +} + +} // namespace redis diff --git a/src/server/redis_connection.cc b/src/server/redis_connection.cc index 99d579fe136..19bcbcebb2d 100644 --- a/src/server/redis_connection.cc +++ b/src/server/redis_connection.cc @@ -391,6 +391,23 @@ void Connection::RecordProfilingSampleIfNeed(const std::string &cmd, uint64_t du srv_->GetPerfLog()->PushEntry(std::move(entry)); } +Status Connection::ExecuteCommand(const std::string &cmd_name, const std::vector &cmd_tokens, + Commander *current_cmd, std::string *reply) { + srv_->stats.IncrCalls(cmd_name); + + auto start = std::chrono::high_resolution_clock::now(); + bool is_profiling = IsProfilingEnabled(cmd_name); + auto s = current_cmd->Execute(srv_, this, reply); + auto end = std::chrono::high_resolution_clock::now(); + uint64_t duration = std::chrono::duration_cast(end - start).count(); + if (is_profiling) RecordProfilingSampleIfNeed(cmd_name, duration); + + srv_->SlowlogPushEntryIfNeeded(&cmd_tokens, duration, this); + srv_->stats.IncrLatency(static_cast(duration), cmd_name); + srv_->FeedMonitorConns(this, cmd_tokens); + return s; +} + void Connection::ExecuteCommands(std::deque *to_process_cmds) { Config *config = srv_->GetConfig(); std::string reply, password = config->requirepass; @@ -403,38 +420,37 @@ void Connection::ExecuteCommands(std::deque *to_process_cmds) { bool is_multi_exec = IsFlagEnabled(Connection::kMultiExec); if (IsFlagEnabled(redis::Connection::kCloseAfterReply) && !is_multi_exec) break; - std::unique_ptr current_cmd; - auto s = srv_->LookupAndCreateCommand(cmd_tokens.front(), ¤t_cmd); - if (!s.IsOK()) { + auto cmd_s = srv_->LookupAndCreateCommand(cmd_tokens.front()); + if (!cmd_s.IsOK()) { if (is_multi_exec) multi_error_ = true; Reply(redis::Error("ERR unknown command " + cmd_tokens.front())); continue; } + auto current_cmd = std::move(*cmd_s); - if (GetNamespace().empty()) { - if (!password.empty() && util::ToLower(cmd_tokens.front()) != "auth" && - util::ToLower(cmd_tokens.front()) != "hello") { - Reply(redis::Error("NOAUTH Authentication required.")); - continue; - } + const auto attributes = current_cmd->GetAttributes(); + auto cmd_name = attributes->name; + auto cmd_flags = attributes->GenerateFlags(cmd_tokens); - if (password.empty()) { + if (GetNamespace().empty()) { + if (!password.empty()) { + if (cmd_name != "auth" && cmd_name != "hello") { + Reply(redis::Error("NOAUTH Authentication required.")); + continue; + } + } else { BecomeAdmin(); SetNamespace(kDefaultNamespace); } } - const auto attributes = current_cmd->GetAttributes(); - auto cmd_name = attributes->name; - auto cmd_flags = attributes->GenerateFlags(cmd_tokens); - std::shared_lock concurrency; // Allow concurrency std::unique_lock exclusivity; // Need exclusivity // If the command needs to process exclusively, we need to get 'ExclusivityGuard' // that can guarantee other threads can't come into critical zone, such as DEBUG, // CLUSTER subcommand, CONFIG SET, MULTI, LUA (in the immediate future). // Otherwise, we just use 'ConcurrencyGuard' to allow all workers to execute commands at the same time. - if (is_multi_exec && attributes->name != "exec") { + if (is_multi_exec && cmd_name != "exec") { // No lock guard, because 'exec' command has acquired 'WorkExclusivityGuard' } else if (cmd_flags & kCmdExclusive) { exclusivity = srv_->WorkExclusivityGuard(); @@ -457,16 +473,15 @@ void Connection::ExecuteCommands(std::deque *to_process_cmds) { continue; } - int arity = attributes->arity; int tokens = static_cast(cmd_tokens.size()); - if ((arity > 0 && tokens != arity) || (arity < 0 && tokens < -arity)) { + if (!attributes->CheckArity(tokens)) { if (is_multi_exec) multi_error_ = true; Reply(redis::Error("ERR wrong number of arguments")); continue; } current_cmd->SetArgs(cmd_tokens); - s = current_cmd->Parse(); + auto s = current_cmd->Parse(); if (!s.IsOK()) { if (is_multi_exec) multi_error_ = true; Reply(redis::Error("ERR " + s.Msg())); @@ -474,8 +489,7 @@ void Connection::ExecuteCommands(std::deque *to_process_cmds) { } if (is_multi_exec && (cmd_flags & kCmdNoMulti)) { - std::string no_multi_err = "ERR Can't execute " + attributes->name + " in MULTI"; - Reply(redis::Error(no_multi_err)); + Reply(redis::Error("ERR Can't execute " + cmd_name + " in MULTI")); multi_error_ = true; continue; } @@ -501,6 +515,11 @@ void Connection::ExecuteCommands(std::deque *to_process_cmds) { continue; } + if ((cmd_flags & kCmdWrite) && !(cmd_flags & kCmdNoDBSizeCheck) && srv_->storage->ReachedDBSizeLimit()) { + Reply(redis::Error("ERR write command not allowed when reached max-db-size.")); + continue; + } + if (!config->slave_serve_stale_data && srv_->IsSlave() && cmd_name != "info" && cmd_name != "slaveof" && srv_->GetReplicationState() != kReplConnected) { Reply( @@ -510,18 +529,7 @@ void Connection::ExecuteCommands(std::deque *to_process_cmds) { } SetLastCmd(cmd_name); - srv_->stats.IncrCalls(cmd_name); - - auto start = std::chrono::high_resolution_clock::now(); - bool is_profiling = IsProfilingEnabled(cmd_name); - s = current_cmd->Execute(srv_, this, &reply); - auto end = std::chrono::high_resolution_clock::now(); - uint64_t duration = std::chrono::duration_cast(end - start).count(); - if (is_profiling) RecordProfilingSampleIfNeed(cmd_name, duration); - - srv_->SlowlogPushEntryIfNeeded(&cmd_tokens, duration, this); - srv_->stats.IncrLatency(static_cast(duration), cmd_name); - srv_->FeedMonitorConns(this, cmd_tokens); + s = ExecuteCommand(cmd_name, cmd_tokens, current_cmd.get(), &reply); // Break the execution loop when occurring the blocking command like BLPOP or BRPOP, // it will suspend the connection and wait for the wakeup signal. diff --git a/src/server/redis_connection.h b/src/server/redis_connection.h index e51f66ab72e..9faf0c2700a 100644 --- a/src/server/redis_connection.h +++ b/src/server/redis_connection.h @@ -71,6 +71,17 @@ class Connection : public EvbufCallbackBase { std::string Double(double d) const { return protocol_version_ == RESP::v3 ? "," + util::Float2String(d) + CRLF : BulkString(util::Float2String(d)); } + // ext is the extension of file to send, 'txt' for text file, 'md ' for markdown file + // at most 3 chars, padded with space + // if RESP is V2, treat verbatim string as blob string + // https://github.com/redis/redis/blob/7.2/src/networking.c#L1099 + std::string VerbatimString(std::string ext, const std::string &data) const { + CHECK(ext.size() <= 3); + size_t padded_len = 3 - ext.size(); + ext = ext + std::string(padded_len, ' '); + return protocol_version_ == RESP::v3 ? "=" + std::to_string(3 + 1 + data.size()) + CRLF + ext + ":" + data + CRLF + : BulkString(data); + } std::string NilString() const { return redis::NilString(protocol_version_); } std::string NilArray() const { return protocol_version_ == RESP::v3 ? "_" CRLF : "*-1" CRLF; } std::string MultiBulkString(const std::vector &values) const; @@ -86,6 +97,10 @@ class Connection : public EvbufCallbackBase { return protocol_version_ == RESP::v3 ? "%" + std::to_string(len) + CRLF : MultiLen(len * 2); } std::string MapOfBulkStrings(const std::vector &elems) const; + template , int> = 0> + std::string HeaderOfAttribute(T len) const { + return "|" + std::to_string(len) + CRLF; + } using UnsubscribeCallback = std::function; void SubscribeChannel(const std::string &channel); @@ -144,6 +159,8 @@ class Connection : public EvbufCallbackBase { evbuffer *Output() { return bufferevent_get_output(bev_); } bufferevent *GetBufferEvent() { return bev_; } void ExecuteCommands(std::deque *to_process_cmds); + Status ExecuteCommand(const std::string &cmd_name, const std::vector &cmd_tokens, Commander *current_cmd, + std::string *reply); bool IsProfilingEnabled(const std::string &cmd); void RecordProfilingSampleIfNeed(const std::string &cmd, uint64_t duration); void SetImporting() { importing_ = true; } diff --git a/src/server/server.cc b/src/server/server.cc index bc4c29391f8..cd6b4acd48e 100644 --- a/src/server/server.cc +++ b/src/server/server.cc @@ -160,8 +160,7 @@ Status Server::Start() { } } // Create objects used for slot migration - slot_migrator = - std::make_unique(this, config_->migrate_speed, config_->pipeline_size, config_->sequence_gap); + slot_migrator = std::make_unique(this); auto s = slot_migrator->CreateMigrationThread(); if (!s.IsOK()) { return s.Prefixed("failed to create migration thread"); @@ -680,7 +679,7 @@ void Server::OnEntryAddedToStream(const std::string &ns, const std::string &key, void Server::updateCachedTime() { unix_time.store(util::GetTimeStamp()); } int Server::IncrClientNum() { - total_clients_.fetch_add(1, std::memory_order::memory_order_relaxed); + total_clients_.fetch_add(1, std::memory_order_relaxed); return connected_clients_.fetch_add(1, std::memory_order_relaxed); } @@ -1598,7 +1597,7 @@ ReplState Server::GetReplicationState() { return kReplConnecting; } -Status Server::LookupAndCreateCommand(const std::string &cmd_name, std::unique_ptr *cmd) { +StatusOr> Server::LookupAndCreateCommand(const std::string &cmd_name) { if (cmd_name.empty()) return {Status::RedisUnknownCmd}; auto commands = redis::CommandTable::Get(); @@ -1607,11 +1606,11 @@ Status Server::LookupAndCreateCommand(const std::string &cmd_name, std::unique_p return {Status::RedisUnknownCmd}; } - auto redis_cmd = cmd_iter->second; - *cmd = redis_cmd->factory(); - (*cmd)->SetAttributes(redis_cmd); + auto cmd_attr = cmd_iter->second; + auto cmd = cmd_attr->factory(); + cmd->SetAttributes(cmd_attr); - return Status::OK(); + return cmd; } Status Server::ScriptExists(const std::string &sha) { diff --git a/src/server/server.h b/src/server/server.h index a86eedf1cd8..4f8fe3142bf 100644 --- a/src/server/server.h +++ b/src/server/server.h @@ -177,7 +177,7 @@ class Server { bool IsStopped() const { return stop_; } bool IsLoading() const { return is_loading_; } Config *GetConfig() { return config_; } - static Status LookupAndCreateCommand(const std::string &cmd_name, std::unique_ptr *cmd); + static StatusOr> LookupAndCreateCommand(const std::string &cmd_name); void AdjustOpenFilesLimit(); void AdjustWorkerThreads(); diff --git a/src/storage/batch_extractor.cc b/src/storage/batch_extractor.cc index 56f588746a8..f60669ebb5e 100644 --- a/src/storage/batch_extractor.cc +++ b/src/storage/batch_extractor.cc @@ -214,8 +214,7 @@ rocksdb::Status WriteBatchExtractor::PutCF(uint32_t column_family_id, const Slic return rocksdb::Status::InvalidArgument( fmt::format("failed to parse an offset of SETBIT: {}", parsed_offset.Msg())); } - - bool bit_value = redis::Bitmap::GetBitFromValueAndOffset(value.ToString(), *parsed_offset); + bool bit_value = redis::Bitmap::GetBitFromValueAndOffset(value.ToStringView(), *parsed_offset); command_args = {"SETBIT", user_key, (*args)[1], bit_value ? "1" : "0"}; break; } diff --git a/src/storage/event_listener.cc b/src/storage/event_listener.cc index 8bc204f76b8..a32f8b70300 100644 --- a/src/storage/event_listener.cc +++ b/src/storage/event_listener.cc @@ -75,9 +75,19 @@ bool IsDiskQuotaExceeded(const rocksdb::Status &bg_error) { return err_msg.find(exceeded_quota_str) != std::string::npos; } +void EventListener::OnCompactionBegin(rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) { + LOG(INFO) << "[event_listener/compaction_begin] column family: " << ci.cf_name << ", job_id: " << ci.job_id + << ", compaction reason: " << rocksdb::GetCompactionReasonString(ci.compaction_reason) + << ", output compression type: " << CompressType2String(ci.compression) + << ", base input level(files): " << ci.base_input_level << "(" << ci.input_files.size() << ")" + << ", output level(files): " << ci.output_level << "(" << ci.output_files.size() << ")" + << ", input bytes: " << ci.stats.total_input_bytes << ", output bytes:" << ci.stats.total_output_bytes + << ", is_manual_compaction:" << (ci.stats.is_manual_compaction ? "yes" : "no"); +} + void EventListener::OnCompactionCompleted(rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) { LOG(INFO) << "[event_listener/compaction_completed] column family: " << ci.cf_name << ", job_id: " << ci.job_id - << ", compaction reason: " << static_cast(ci.compaction_reason) + << ", compaction reason: " << rocksdb::GetCompactionReasonString(ci.compaction_reason) << ", output compression type: " << CompressType2String(ci.compression) << ", base input level(files): " << ci.base_input_level << "(" << ci.input_files.size() << ")" << ", output level(files): " << ci.output_level << "(" << ci.output_files.size() << ")" @@ -88,9 +98,25 @@ void EventListener::OnCompactionCompleted(rocksdb::DB *db, const rocksdb::Compac storage_->CheckDBSizeLimit(); } +void EventListener::OnSubcompactionBegin(const rocksdb::SubcompactionJobInfo &si) { + LOG(INFO) << "[event_listener/subcompaction_begin] column family: " << si.cf_name << ", job_id: " << si.job_id + << ", compaction reason: " << rocksdb::GetCompactionReasonString(si.compaction_reason) + << ", output compression type: " << CompressType2String(si.compression); +} + +void EventListener::OnSubcompactionCompleted(const rocksdb::SubcompactionJobInfo &si) { + LOG(INFO) << "[event_listener/subcompaction_completed] column family: " << si.cf_name << ", job_id: " << si.job_id + << ", compaction reason: " << rocksdb::GetCompactionReasonString(si.compaction_reason) + << ", output compression type: " << CompressType2String(si.compression) + << ", base input level(files): " << si.base_input_level << ", output level(files): " << si.output_level + << ", input bytes: " << si.stats.total_input_bytes << ", output bytes:" << si.stats.total_output_bytes + << ", is_manual_compaction:" << (si.stats.is_manual_compaction ? "yes" : "no") + << ", elapsed(micro): " << si.stats.elapsed_micros; +} + void EventListener::OnFlushBegin(rocksdb::DB *db, const rocksdb::FlushJobInfo &fi) { LOG(INFO) << "[event_listener/flush_begin] column family: " << fi.cf_name << ", thread_id: " << fi.thread_id - << ", job_id: " << fi.job_id << ", reason: " << static_cast(fi.flush_reason); + << ", job_id: " << fi.job_id << ", reason: " << rocksdb::GetFlushReasonString(fi.flush_reason); } void EventListener::OnFlushCompleted(rocksdb::DB *db, const rocksdb::FlushJobInfo &fi) { diff --git a/src/storage/event_listener.h b/src/storage/event_listener.h index 4cd1606f3d1..3e978c23841 100644 --- a/src/storage/event_listener.h +++ b/src/storage/event_listener.h @@ -31,7 +31,11 @@ class EventListener : public rocksdb::EventListener { ~EventListener() override = default; void OnFlushBegin(rocksdb::DB *db, const rocksdb::FlushJobInfo &fi) override; void OnFlushCompleted(rocksdb::DB *db, const rocksdb::FlushJobInfo &fi) override; + void OnCompactionBegin(rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) override; void OnCompactionCompleted(rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) override; + void OnSubcompactionBegin(const rocksdb::SubcompactionJobInfo &si) override; + void OnSubcompactionCompleted(const rocksdb::SubcompactionJobInfo &si) override; + void OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) override; void OnTableFileDeleted(const rocksdb::TableFileDeletionInfo &info) override; void OnStallConditionsChanged(const rocksdb::WriteStallInfo &info) override; diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 1bca93d77dc..5e872af0eda 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -471,3 +471,21 @@ rocksdb::Status JsonMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } + +void SearchMetadata::Encode(std::string *dst) const { + Metadata::Encode(dst); + + PutFixed8(dst, uint8_t(on_data_type)); +} + +rocksdb::Status SearchMetadata::Decode(Slice *input) { + if (auto s = Metadata::Decode(input); !s.ok()) { + return s; + } + + if (!GetFixed8(input, reinterpret_cast(&on_data_type))) { + return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); + } + + return rocksdb::Status::OK(); +} diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index ce1026443af..dac2d0e16bb 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -49,6 +49,7 @@ enum RedisType : uint8_t { kRedisStream = 8, kRedisBloomFilter = 9, kRedisJson = 10, + kRedisSearch = 11, }; struct RedisTypes { @@ -313,3 +314,18 @@ class JsonMetadata : public Metadata { void Encode(std::string *dst) const override; rocksdb::Status Decode(Slice *input) override; }; + +enum class SearchOnDataType : uint8_t { + HASH = kRedisHash, + JSON = kRedisJson, +}; + +class SearchMetadata : public Metadata { + public: + SearchOnDataType on_data_type; + + explicit SearchMetadata(bool generate_version = true) : Metadata(kRedisSearch, generate_version) {} + + void Encode(std::string *dst) const override; + rocksdb::Status Decode(Slice *input) override; +}; diff --git a/src/storage/scripting.cc b/src/storage/scripting.cc index 8105d321464..e4c6d1fb6fa 100644 --- a/src/storage/scripting.cc +++ b/src/storage/scripting.cc @@ -707,36 +707,32 @@ int RedisGenericCommand(lua_State *lua, int raise_error) { } } - auto commands = redis::CommandTable::Get(); - auto cmd_iter = commands->find(util::ToLower(args[0])); - if (cmd_iter == commands->end()) { + auto cmd_s = Server::LookupAndCreateCommand(args[0]); + if (!cmd_s) { PushError(lua, "Unknown Redis command called from Lua script"); return raise_error ? RaiseError(lua) : 1; } + auto cmd = *std::move(cmd_s); - auto redis_cmd = cmd_iter->second; - if (read_only && !(redis_cmd->flags & redis::kCmdReadOnly)) { + auto attributes = cmd->GetAttributes(); + auto cmd_flags = attributes->GenerateFlags(args); + + if (read_only && !(cmd_flags & redis::kCmdReadOnly)) { PushError(lua, "Write commands are not allowed from read-only scripts"); return raise_error ? RaiseError(lua) : 1; } - auto cmd = redis_cmd->factory(); - cmd->SetAttributes(redis_cmd); - cmd->SetArgs(args); - - int arity = cmd->GetAttributes()->arity; - if (((arity > 0 && argc != arity) || (arity < 0 && argc < -arity))) { + if (!attributes->CheckArity(argc)) { PushError(lua, "Wrong number of args calling Redis command From Lua script"); return raise_error ? RaiseError(lua) : 1; } - auto attributes = cmd->GetAttributes(); - auto cmd_flags = attributes->GenerateFlags(args); + if (cmd_flags & redis::kCmdNoScript) { PushError(lua, "This Redis command is not allowed from scripts"); return raise_error ? RaiseError(lua) : 1; } - std::string cmd_name = util::ToLower(args[0]); + std::string cmd_name = attributes->name; auto srv = GetServer(lua); Config *config = srv->GetConfig(); @@ -763,23 +759,15 @@ int RedisGenericCommand(lua_State *lua, int raise_error) { return raise_error ? RaiseError(lua) : 1; } - auto s = cmd->Parse(args); + cmd->SetArgs(args); + auto s = cmd->Parse(); if (!s) { PushError(lua, s.Msg().data()); return raise_error ? RaiseError(lua) : 1; } - srv->stats.IncrCalls(cmd_name); - auto start = std::chrono::high_resolution_clock::now(); - bool is_profiling = conn->IsProfilingEnabled(cmd_name); std::string output; - s = cmd->Execute(srv, srv->GetCurrentConnection(), &output); - auto end = std::chrono::high_resolution_clock::now(); - uint64_t duration = std::chrono::duration_cast(end - start).count(); - if (is_profiling) conn->RecordProfilingSampleIfNeed(cmd_name, duration); - srv->SlowlogPushEntryIfNeeded(&args, duration, conn); - srv->stats.IncrLatency(static_cast(duration), cmd_name); - srv->FeedMonitorConns(conn, args); + s = conn->ExecuteCommand(cmd_name, args, cmd.get(), &output); if (!s) { PushError(lua, s.Msg().data()); return raise_error ? RaiseError(lua) : 1; diff --git a/src/storage/storage.cc b/src/storage/storage.cc index 75f6fb6de86..a74e49d3523 100644 --- a/src/storage/storage.cc +++ b/src/storage/storage.cc @@ -252,7 +252,7 @@ Status Storage::CreateColumnFamilies(const rocksdb::Options &options) { return Status::OK(); } - return res; + return std::move(res); } return Status::OK(); @@ -434,7 +434,8 @@ Status Storage::RestoreFromBackup() { // We must reopen the backup engine every time, as the files is changed rocksdb::BackupEngineOptions bk_option(config_->backup_sync_dir); auto bes = util::BackupEngineOpen(db_->GetEnv(), bk_option); - if (!bes) return bes; + if (!bes) return std::move(bes); + backup_ = std::move(*bes); auto s = backup_->RestoreDBFromLatestBackup(config_->db_dir, config_->db_dir); @@ -639,10 +640,6 @@ rocksdb::Status Storage::Write(const rocksdb::WriteOptions &options, rocksdb::Wr } rocksdb::Status Storage::writeToDB(const rocksdb::WriteOptions &options, rocksdb::WriteBatch *updates) { - if (db_size_limit_reached_) { - return rocksdb::Status::SpaceLimit(); - } - // Put replication id logdata at the end of write batch if (replid_.length() == kReplIdLength) { updates->PutLogData(ServerLogData(kReplIdLog, replid_).Encode()); @@ -737,6 +734,8 @@ rocksdb::ColumnFamilyHandle *Storage::GetCFHandle(const std::string &name) { return cf_handles_[0]; } +rocksdb::ColumnFamilyHandle *Storage::GetCFHandle(ColumnFamilyID id) { return cf_handles_[static_cast(id)]; } + rocksdb::Status Storage::Compact(rocksdb::ColumnFamilyHandle *cf, const Slice *begin, const Slice *end) { rocksdb::CompactRangeOptions compact_opts; compact_opts.change_level = true; diff --git a/src/storage/storage.h b/src/storage/storage.h index 6114c7fc437..0e20425a68d 100644 --- a/src/storage/storage.h +++ b/src/storage/storage.h @@ -51,7 +51,7 @@ inline constexpr StorageEngineType STORAGE_ENGINE_TYPE = StorageEngineType::KVRO const int kReplIdLength = 16; enum ColumnFamilyID { - kColumnFamilyIDDefault, + kColumnFamilyIDDefault = 0, kColumnFamilyIDMetadata, kColumnFamilyIDZSetScore, kColumnFamilyIDPubSub, @@ -177,11 +177,14 @@ class Storage { bool IsClosing() const { return db_closing_; } std::string GetName() const { return config_->db_name; } rocksdb::ColumnFamilyHandle *GetCFHandle(const std::string &name); + rocksdb::ColumnFamilyHandle *GetCFHandle(ColumnFamilyID id); std::vector *GetCFHandles() { return &cf_handles_; } LockManager *GetLockManager() { return &lock_mgr_; } void PurgeOldBackups(uint32_t num_backups_to_keep, uint32_t backup_max_keep_hours); uint64_t GetTotalSize(const std::string &ns = kDefaultNamespace); void CheckDBSizeLimit(); + bool ReachedDBSizeLimit() { return db_size_limit_reached_; } + void SetDBSizeLimit(bool limit) { db_size_limit_reached_ = limit; } void SetIORateLimit(int64_t max_io_mb); std::shared_lock ReadLockGuard(); @@ -254,7 +257,7 @@ class Storage { Config *config_ = nullptr; std::vector cf_handles_; LockManager lock_mgr_; - bool db_size_limit_reached_ = false; + std::atomic db_size_limit_reached_{false}; DBStats db_stats_; diff --git a/src/types/redis_bitmap.cc b/src/types/redis_bitmap.cc index 31cd55ba7f6..ac3d8768f8e 100644 --- a/src/types/redis_bitmap.cc +++ b/src/types/redis_bitmap.cc @@ -24,19 +24,49 @@ #include #include +#include "common/bit_util.h" #include "db_util.h" #include "parse_util.h" #include "redis_bitmap_string.h" namespace redis { -const uint32_t kBitmapSegmentBits = 1024 * 8; -const uint32_t kBitmapSegmentBytes = 1024; +constexpr uint32_t kBitmapSegmentBits = 1024 * 8; +constexpr uint32_t kBitmapSegmentBytes = 1024; -const char kErrBitmapStringOutOfRange[] = +constexpr char kErrBitmapStringOutOfRange[] = "The size of the bitmap string exceeds the " "configuration item max-bitmap-to-string-mb"; +/* + * If you setbit bit 0 1, the value is stored as 0x01 in Kvrocks Bitmap but 0x80 + * in Redis and Kvrocks BitmapString. This is because Kvrocks Bitmap uses LSB, + * but Redis and Kvrocks BitmapString use MSB. + * + * If you setbit bit 0 1, the value is stored as 0x01 in Kvrocks but 0x80 in Redis. + * So we need to swap bits is to keep the same return value as Redis. + * This swap table is generated according to the following mapping definition. + * kBitSwapTable(x) = ((x & 0x80) >> 7)| ((x & 0x40) >> 5)|\ + * ((x & 0x20) >> 3)| ((x & 0x10) >> 1)|\ + * ((x & 0x08) << 1)| ((x & 0x04) << 3)|\ + * ((x & 0x02) << 5)| ((x & 0x01) << 7); + */ +static const uint8_t kBitSwapTable[256] = { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, + 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, + 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, + 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, + 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, + 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, + 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, + 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, + 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, + 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, + 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, + 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, + 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, + 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF}; + // Resize the segment to makes its new length at least min_bytes, new bytes will be set to 0. // min_bytes can not more than kBitmapSegmentBytes void ExpandBitmapSegment(std::string *segment, size_t min_bytes) { @@ -56,6 +86,13 @@ void ExpandBitmapSegment(std::string *segment, size_t min_bytes) { } } +// Constructing sub-key index, see: +// https://kvrocks.apache.org/community/data-structure-on-rocksdb#bitmap-sub-keys-values +// The value is also equal to the offset of the bytes in the bitmap. +uint32_t SegmentSubKeyIndexForBit(uint32_t bit_offset) { + return (bit_offset / kBitmapSegmentBits) * kBitmapSegmentBytes; +} + rocksdb::Status Bitmap::GetMetadata(const Slice &ns_key, BitmapMetadata *metadata, std::string *raw_value) { auto s = GetRawMetadata(ns_key, raw_value); if (!s.ok()) return s; @@ -64,7 +101,7 @@ rocksdb::Status Bitmap::GetMetadata(const Slice &ns_key, BitmapMetadata *metadat return ParseMetadata({kRedisBitmap, kRedisString}, &slice, metadata); } -rocksdb::Status Bitmap::GetBit(const Slice &user_key, uint32_t offset, bool *bit) { +rocksdb::Status Bitmap::GetBit(const Slice &user_key, uint32_t bit_offset, bool *bit) { *bit = false; std::string raw_value; std::string ns_key = AppendNamespacePrefix(user_key); @@ -75,20 +112,23 @@ rocksdb::Status Bitmap::GetBit(const Slice &user_key, uint32_t offset, bool *bit if (metadata.Type() == kRedisString) { redis::BitmapString bitmap_string_db(storage_, namespace_); - return bitmap_string_db.GetBit(raw_value, offset, bit); + return bitmap_string_db.GetBit(raw_value, bit_offset, bit); } LatestSnapShot ss(storage_); rocksdb::ReadOptions read_options; read_options.snapshot = ss.GetSnapShot(); - uint32_t index = (offset / kBitmapSegmentBits) * kBitmapSegmentBytes; - std::string value; - std::string sub_key = - InternalKey(ns_key, std::to_string(index), metadata.version, storage_->IsSlotIdEncoded()).Encode(); + rocksdb::PinnableSlice value; + std::string sub_key = InternalKey(ns_key, std::to_string(SegmentSubKeyIndexForBit(bit_offset)), metadata.version, + storage_->IsSlotIdEncoded()) + .Encode(); s = storage_->Get(read_options, sub_key, &value); + // If s.IsNotFound(), it means all bits in this segment are 0, + // so we can return with *bit == false directly. if (!s.ok()) return s.IsNotFound() ? rocksdb::Status::OK() : s; - uint32_t byte_index = (offset / 8) % kBitmapSegmentBytes; - if ((byte_index < value.size() && (value[byte_index] & (1 << (offset % 8))))) { + uint32_t bit_offset_in_segment = bit_offset % kBitmapSegmentBits; + if (bit_offset_in_segment / 8 < value.size() && + util::lsb::GetBit(reinterpret_cast(value.data()), bit_offset_in_segment)) { *bit = true; } return rocksdb::Status::OK(); @@ -129,41 +169,16 @@ rocksdb::Status Bitmap::GetString(const Slice &user_key, const uint32_t max_btos uint32_t valid_size = std::min( {fragment.size(), static_cast(kBitmapSegmentBytes), static_cast(metadata.size - frag_index)}); - /* - * If you setbit bit 0 1, the value is stored as 0x01 in Kvrocks but 0x80 in Redis. - * So we need to swap bits is to keep the same return value as Redis. - * This swap table is generated according to the following mapping definition. - * swap_table(x) = ((x & 0x80) >> 7)| ((x & 0x40) >> 5)|\ - * ((x & 0x20) >> 3)| ((x & 0x10) >> 1)|\ - * ((x & 0x08) << 1)| ((x & 0x04) << 3)|\ - * ((x & 0x02) << 5)| ((x & 0x01) << 7); - */ - static const uint8_t swap_table[256] = { - 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, - 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, - 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, - 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, - 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, - 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, - 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, - 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, - 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, - 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, - 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, - 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, - 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, - 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, - 0x3F, 0xBF, 0x7F, 0xFF}; for (uint32_t i = 0; i < valid_size; i++) { if (!fragment[i]) continue; - fragment[i] = static_cast(swap_table[static_cast(fragment[i])]); + fragment[i] = static_cast(kBitSwapTable[static_cast(fragment[i])]); } value->replace(frag_index, valid_size, fragment.data(), valid_size); } return rocksdb::Status::OK(); } -rocksdb::Status Bitmap::SetBit(const Slice &user_key, uint32_t offset, bool new_bit, bool *old_bit) { +rocksdb::Status Bitmap::SetBit(const Slice &user_key, uint32_t bit_offset, bool new_bit, bool *old_bit) { std::string raw_value; std::string ns_key = AppendNamespacePrefix(user_key); @@ -174,30 +189,28 @@ rocksdb::Status Bitmap::SetBit(const Slice &user_key, uint32_t offset, bool new_ if (metadata.Type() == kRedisString) { redis::BitmapString bitmap_string_db(storage_, namespace_); - return bitmap_string_db.SetBit(ns_key, &raw_value, offset, new_bit, old_bit); + return bitmap_string_db.SetBit(ns_key, &raw_value, bit_offset, new_bit, old_bit); } std::string value; - uint32_t index = (offset / kBitmapSegmentBits) * kBitmapSegmentBytes; + uint32_t segment_index = SegmentSubKeyIndexForBit(bit_offset); std::string sub_key = - InternalKey(ns_key, std::to_string(index), metadata.version, storage_->IsSlotIdEncoded()).Encode(); + InternalKey(ns_key, std::to_string(segment_index), metadata.version, storage_->IsSlotIdEncoded()).Encode(); if (s.ok()) { s = storage_->Get(rocksdb::ReadOptions(), sub_key, &value); if (!s.ok() && !s.IsNotFound()) return s; } - uint32_t byte_index = (offset / 8) % kBitmapSegmentBytes; - uint64_t used_size = index + byte_index + 1; + uint32_t bit_offset_in_segment = bit_offset % kBitmapSegmentBits; + uint32_t byte_index = (bit_offset / 8) % kBitmapSegmentBytes; + uint64_t used_size = segment_index + byte_index + 1; uint64_t bitmap_size = std::max(used_size, metadata.size); + // NOTE: value.size() might be greater than metadata.size. ExpandBitmapSegment(&value, byte_index + 1); - uint32_t bit_offset = offset % 8; - *old_bit = (value[byte_index] & (1 << bit_offset)) != 0; - if (new_bit) { - value[byte_index] = static_cast(value[byte_index] | (1 << bit_offset)); - } else { - value[byte_index] = static_cast(value[byte_index] & (~(1 << bit_offset))); - } + auto *data_ptr = reinterpret_cast(value.data()); + *old_bit = util::lsb::GetBit(data_ptr, bit_offset_in_segment); + util::lsb::SetBitTo(data_ptr, bit_offset_in_segment, new_bit); auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisBitmap, {std::to_string(kRedisCmdSetBit), std::to_string(offset)}); + WriteBatchLogData log_data(kRedisBitmap, {std::to_string(kRedisCmdSetBit), std::to_string(bit_offset)}); batch->PutLogData(log_data.Encode()); batch->Put(sub_key, value); if (metadata.size != bitmap_size) { @@ -209,7 +222,7 @@ rocksdb::Status Bitmap::SetBit(const Slice &user_key, uint32_t offset, bool new_ return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t stop, uint32_t *cnt) { +rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t stop, bool is_bit_index, uint32_t *cnt) { *cnt = 0; std::string raw_value; std::string ns_key = AppendNamespacePrefix(user_key); @@ -218,18 +231,31 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t s rocksdb::Status s = GetMetadata(ns_key, &metadata, &raw_value); if (!s.ok()) return s.IsNotFound() ? rocksdb::Status::OK() : s; + /* Convert negative indexes */ + if (start < 0 && stop < 0 && start > stop) { + return rocksdb::Status::OK(); + } + if (metadata.Type() == kRedisString) { redis::BitmapString bitmap_string_db(storage_, namespace_); - return bitmap_string_db.BitCount(raw_value, start, stop, cnt); + return bitmap_string_db.BitCount(raw_value, start, stop, is_bit_index, cnt); } - if (start < 0) start += static_cast(metadata.size) + 1; - if (stop < 0) stop += static_cast(metadata.size) + 1; - if (stop > static_cast(metadata.size)) stop = static_cast(metadata.size); - if (start < 0 || stop <= 0 || start >= stop) return rocksdb::Status::OK(); + auto totlen = static_cast(metadata.size); + if (is_bit_index) totlen <<= 3; + // Counting bits in byte [start, stop]. + std::tie(start, stop) = BitmapString::NormalizeRange(start, stop, totlen); + // Always return 0 if start is greater than stop after normalization. + if (start > stop) return rocksdb::Status::OK(); - auto u_start = static_cast(start); - auto u_stop = static_cast(stop); + int64_t start_byte = start; + int64_t stop_byte = stop; + uint8_t first_byte_neg_mask = 0, last_byte_neg_mask = 0; + std::tie(start_byte, stop_byte) = BitmapString::NormalizeToByteRangeWithPaddingMask( + is_bit_index, start, stop, &first_byte_neg_mask, &last_byte_neg_mask); + + auto u_start = static_cast(start_byte); + auto u_stop = static_cast(stop_byte); LatestSnapShot ss(storage_); rocksdb::ReadOptions read_options; @@ -237,6 +263,7 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t s uint32_t start_index = u_start / kBitmapSegmentBytes; uint32_t stop_index = u_stop / kBitmapSegmentBytes; // Don't use multi get to prevent large range query, and take too much memory + uint32_t mask_cnt = 0; for (uint32_t i = start_index; i <= stop_index; i++) { rocksdb::PinnableSlice pin_value; std::string sub_key = @@ -244,13 +271,34 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t s .Encode(); s = storage_->Get(read_options, sub_key, &pin_value); if (!s.ok() && !s.IsNotFound()) return s; + // NotFound means all bits in this segment are 0. if (s.IsNotFound()) continue; - size_t j = 0; - if (i == start_index) j = u_start % kBitmapSegmentBytes; - auto k = static_cast(pin_value.size()); - if (i == stop_index) k = u_stop % kBitmapSegmentBytes + 1; - *cnt += BitmapString::RawPopcount(reinterpret_cast(pin_value.data()) + j, k); + // Counting bits in [start_in_segment, stop_in_segment] + int64_t start_in_segment = 0; // start_index in 1024 bytes segment + auto readable_stop_in_segment = static_cast(pin_value.size() - 1); // stop_index in 1024 bytes segment + auto stop_in_segment = readable_stop_in_segment; + if (i == start_index) { + start_in_segment = u_start % kBitmapSegmentBytes; + if (is_bit_index && start_in_segment <= readable_stop_in_segment && first_byte_neg_mask != 0) { + uint8_t first_mask_byte = + kBitSwapTable[static_cast(pin_value[start_in_segment])] & first_byte_neg_mask; + mask_cnt += util::RawPopcount(&first_mask_byte, 1); + } + } + if (i == stop_index) { + stop_in_segment = u_stop % kBitmapSegmentBytes; + if (is_bit_index && stop_in_segment <= readable_stop_in_segment && last_byte_neg_mask != 0) { + uint8_t last_mask_byte = kBitSwapTable[static_cast(pin_value[stop_in_segment])] & last_byte_neg_mask; + mask_cnt += util::RawPopcount(&last_mask_byte, 1); + } + } + if (stop_in_segment >= start_in_segment && readable_stop_in_segment >= start_in_segment) { + int64_t bytes = 0; + bytes = std::min(stop_in_segment, readable_stop_in_segment) - start_in_segment + 1; + *cnt += util::RawPopcount(reinterpret_cast(pin_value.data()) + start_in_segment, bytes); + } } + *cnt -= mask_cnt; return rocksdb::Status::OK(); } @@ -271,15 +319,13 @@ rocksdb::Status Bitmap::BitPos(const Slice &user_key, bool bit, int64_t start, i redis::BitmapString bitmap_string_db(storage_, namespace_); return bitmap_string_db.BitPos(raw_value, bit, start, stop, stop_given, pos); } - - if (start < 0) start += static_cast(metadata.size) + 1; - if (stop < 0) stop += static_cast(metadata.size) + 1; - if (start < 0 || stop < 0 || start > stop) { + std::tie(start, stop) = BitmapString::NormalizeRange(start, stop, static_cast(metadata.size)); + auto u_start = static_cast(start); + auto u_stop = static_cast(stop); + if (u_start > u_stop) { *pos = -1; return rocksdb::Status::OK(); } - auto u_start = static_cast(start); - auto u_stop = static_cast(stop); auto bit_pos_in_byte = [](char byte, bool bit) -> int { for (int i = 0; i < 8; i++) { @@ -295,8 +341,9 @@ rocksdb::Status Bitmap::BitPos(const Slice &user_key, bool bit, int64_t start, i uint32_t start_index = u_start / kBitmapSegmentBytes; uint32_t stop_index = u_stop / kBitmapSegmentBytes; // Don't use multi get to prevent large range query, and take too much memory - rocksdb::PinnableSlice pin_value; + // Searching bits in segments [start_index, stop_index]. for (uint32_t i = start_index; i <= stop_index; i++) { + rocksdb::PinnableSlice pin_value; std::string sub_key = InternalKey(ns_key, std::to_string(i * kBitmapSegmentBytes), metadata.version, storage_->IsSlotIdEncoded()) .Encode(); @@ -304,27 +351,56 @@ rocksdb::Status Bitmap::BitPos(const Slice &user_key, bool bit, int64_t start, i if (!s.ok() && !s.IsNotFound()) return s; if (s.IsNotFound()) { if (!bit) { + // Note: even if stop is given, we can return immediately when bit is 0. + // because bit_pos will always be greater. *pos = i * kBitmapSegmentBits; return rocksdb::Status::OK(); } continue; } - size_t j = 0; - if (i == start_index) j = u_start % kBitmapSegmentBytes; - for (; j < pin_value.size(); j++) { - if (i == stop_index && j > (u_stop % kBitmapSegmentBytes)) break; - if (bit_pos_in_byte(pin_value[j], bit) != -1) { - *pos = static_cast(i * kBitmapSegmentBits + j * 8 + bit_pos_in_byte(pin_value[j], bit)); + size_t byte_pos_in_segment = 0; + if (i == start_index) byte_pos_in_segment = u_start % kBitmapSegmentBytes; + size_t stop_byte_in_segment = pin_value.size(); + if (i == stop_index) { + DCHECK_LE(u_stop % kBitmapSegmentBytes + 1, pin_value.size()); + stop_byte_in_segment = u_stop % kBitmapSegmentBytes + 1; + } + // Invariant: + // 1. pin_value.size() <= kBitmapSegmentBytes. + // 2. If it's the last segment, metadata.size % kBitmapSegmentBytes <= pin_value.size(). + for (; byte_pos_in_segment < stop_byte_in_segment; byte_pos_in_segment++) { + int bit_pos_in_byte_value = bit_pos_in_byte(pin_value[byte_pos_in_segment], bit); + if (bit_pos_in_byte_value != -1) { + *pos = static_cast(i * kBitmapSegmentBits + byte_pos_in_segment * 8 + bit_pos_in_byte_value); return rocksdb::Status::OK(); } } - if (!bit && pin_value.size() < kBitmapSegmentBytes) { + if (bit) { + continue; + } + // There're two cases that `pin_value.size() < kBitmapSegmentBytes`: + // 1. If it's the last segment, we've done searching in the above loop. + // 2. If it's not the last segment, we can check if the segment is all 0. + if (pin_value.size() < kBitmapSegmentBytes) { + if (i == stop_index) { + continue; + } *pos = static_cast(i * kBitmapSegmentBits + pin_value.size() * 8); return rocksdb::Status::OK(); } - pin_value.Reset(); } // bit was not found + /* If we are looking for clear bits, and the user specified an exact + * range with start-end, we can't consider the right of the range as + * zero padded (as we do when no explicit end is given). + * + * So if redisBitpos() returns the first bit outside the range, + * we return -1 to the caller, to mean, in the specified range there + * is not a single "0" bit. */ + if (stop_given && bit == 0) { + *pos = -1; + return rocksdb::Status::OK(); + } *pos = bit ? -1 : static_cast(metadata.size * 8); return rocksdb::Status::OK(); } @@ -336,7 +412,7 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co LockGuard guard(storage_->GetLockManager(), ns_key); std::vector> meta_pairs; - uint64_t max_size = 0, num_keys = op_keys.size(); + uint64_t max_bitmap_size = 0; for (const auto &op_key : op_keys) { BitmapMetadata metadata(false); @@ -344,20 +420,22 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co auto s = GetMetadata(ns_op_key, &metadata, &raw_value); if (!s.ok()) { if (s.IsNotFound()) { - num_keys--; continue; } return s; } if (metadata.Type() == kRedisString) { - return rocksdb::Status::InvalidArgument(kErrMsgWrongType); + // Currently, we don't support bitop between bitmap and bitmap string. + return rocksdb::Status::NotSupported(kErrMsgWrongType); } - if (metadata.size > max_size) max_size = metadata.size; + if (metadata.size > max_bitmap_size) max_bitmap_size = metadata.size; meta_pairs.emplace_back(std::move(ns_op_key), metadata); } + size_t num_keys = meta_pairs.size(); auto batch = storage_->GetWriteBatchBase(); - if (max_size == 0) { + if (max_bitmap_size == 0) { + /* Compute the bit operation, if all bitmap is empty. cleanup the dest bitmap. */ batch->Delete(metadata_cf_handle_, ns_key); return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } @@ -369,30 +447,32 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co batch->PutLogData(log_data.Encode()); BitmapMetadata res_metadata; - if (num_keys == op_keys.size() || op_flag != kBitOpAnd) { - uint64_t frag_numkeys = num_keys; - uint64_t stop_index = (max_size - 1) / kBitmapSegmentBytes; + // If the operation is AND and the number of keys is less than the number of op_keys, + // we can skip setting the subkeys of the result bitmap and just set the metadata. + const bool can_skip_op = op_flag == kBitOpAnd && num_keys != op_keys.size(); + if (!can_skip_op) { + uint64_t stop_index = (max_bitmap_size - 1) / kBitmapSegmentBytes; std::unique_ptr frag_res(new unsigned char[kBitmapSegmentBytes]); - uint16_t frag_maxlen = 0, frag_minlen = 0; - std::string fragment; - unsigned char output = 0, byte = 0; - std::vector fragments; LatestSnapShot ss(storage_); rocksdb::ReadOptions read_options; read_options.snapshot = ss.GetSnapShot(); for (uint64_t frag_index = 0; frag_index <= stop_index; frag_index++) { + std::vector fragments; + uint16_t frag_maxlen = 0, frag_minlen = 0; for (const auto &meta_pair : meta_pairs) { std::string sub_key = InternalKey(meta_pair.first, std::to_string(frag_index * kBitmapSegmentBytes), meta_pair.second.version, storage_->IsSlotIdEncoded()) .Encode(); + rocksdb::PinnableSlice fragment; auto s = storage_->Get(read_options, sub_key, &fragment); if (!s.ok() && !s.IsNotFound()) { return s; } if (s.IsNotFound()) { - frag_numkeys--; if (op_flag == kBitOpAnd) { + // If any of the input bitmaps is empty, the result of AND + // is empty. frag_maxlen = 0; break; } @@ -403,6 +483,7 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co } } + size_t frag_numkeys = fragments.size(); if (frag_maxlen != 0 || op_flag == kBitOpNot) { uint16_t j = 0; if (op_flag == kBitOpNot) { @@ -411,6 +492,11 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co memset(frag_res.get(), 0, frag_maxlen); } + /* Fast path: as far as we have data for all the input bitmaps we + * can take a fast path that performs much better than the + * vanilla algorithm. On ARM we skip the fast path since it will + * result in GCC compiling the code using multiple-words load/store + * operations that are not supported even in ARM >= v6. */ #ifndef USE_ALIGNED_ACCESS if (frag_minlen >= sizeof(uint64_t) * 4 && frag_numkeys <= 16) { auto *lres = reinterpret_cast(frag_res.get()); @@ -419,46 +505,30 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co lp[i] = reinterpret_cast(fragments[i].data()); } memcpy(frag_res.get(), fragments[0].data(), frag_minlen); - - if (op_flag == kBitOpAnd) { + auto apply_fast_path_op = [&](auto op) { + // Note: kBitOpNot cannot use this op, it only applying + // to kBitOpAnd, kBitOpOr, kBitOpXor. + DCHECK(op_flag != kBitOpNot); while (frag_minlen >= sizeof(uint64_t) * 4) { for (uint64_t i = 1; i < frag_numkeys; i++) { - lres[0] &= lp[i][0]; - lres[1] &= lp[i][1]; - lres[2] &= lp[i][2]; - lres[3] &= lp[i][3]; + op(lres[0], lp[i][0]); + op(lres[1], lp[i][1]); + op(lres[2], lp[i][2]); + op(lres[3], lp[i][3]); lp[i] += 4; } lres += 4; j += sizeof(uint64_t) * 4; frag_minlen -= sizeof(uint64_t) * 4; } + }; + + if (op_flag == kBitOpAnd) { + apply_fast_path_op([](uint64_t &a, uint64_t b) { a &= b; }); } else if (op_flag == kBitOpOr) { - while (frag_minlen >= sizeof(uint64_t) * 4) { - for (uint64_t i = 1; i < frag_numkeys; i++) { - lres[0] |= lp[i][0]; - lres[1] |= lp[i][1]; - lres[2] |= lp[i][2]; - lres[3] |= lp[i][3]; - lp[i] += 4; - } - lres += 4; - j += sizeof(uint64_t) * 4; - frag_minlen -= sizeof(uint64_t) * 4; - } + apply_fast_path_op([](uint64_t &a, uint64_t b) { a |= b; }); } else if (op_flag == kBitOpXor) { - while (frag_minlen >= sizeof(uint64_t) * 4) { - for (uint64_t i = 1; i < frag_numkeys; i++) { - lres[0] ^= lp[i][0]; - lres[1] ^= lp[i][1]; - lres[2] ^= lp[i][2]; - lres[3] ^= lp[i][3]; - lp[i] += 4; - } - lres += 4; - j += sizeof(uint64_t) * 4; - frag_minlen -= sizeof(uint64_t) * 4; - } + apply_fast_path_op([](uint64_t &a, uint64_t b) { a ^= b; }); } else if (op_flag == kBitOpNot) { while (frag_minlen >= sizeof(uint64_t) * 4) { lres[0] = ~lres[0]; @@ -473,6 +543,7 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co } #endif + uint8_t output = 0, byte = 0; for (; j < frag_maxlen; j++) { output = (fragments[0].size() <= j) ? 0 : fragments[0][j]; if (op_flag == kBitOpNot) output = ~output; @@ -497,13 +568,15 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co if (op_flag == kBitOpNot) { if (frag_index == stop_index) { - if (max_size == (frag_index + 1) * kBitmapSegmentBytes) { - // If the last fragment is full, `max_size % kBitmapSegmentBytes` + // We should not set the extra bytes to 0xff. So we should limit + // `frag_maxlen` for the last segment. + if (max_bitmap_size == (frag_index + 1) * kBitmapSegmentBytes) { + // If the last fragment is full, `max_bitmap_size % kBitmapSegmentBytes` // would be 0. In this case, we should set `frag_maxlen` to // `kBitmapSegmentBytes` to avoid writing an empty fragment. frag_maxlen = kBitmapSegmentBytes; } else { - frag_maxlen = max_size % kBitmapSegmentBytes; + frag_maxlen = max_bitmap_size % kBitmapSegmentBytes; } } else { frag_maxlen = kBitmapSegmentBytes; @@ -514,19 +587,14 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co .Encode(); batch->Put(sub_key, Slice(reinterpret_cast(frag_res.get()), frag_maxlen)); } - - frag_maxlen = 0; - frag_minlen = 0; - frag_numkeys = num_keys; - fragments.clear(); } } std::string bytes; - res_metadata.size = max_size; + res_metadata.size = max_bitmap_size; res_metadata.Encode(&bytes); batch->Put(metadata_cf_handle_, ns_key, bytes); - *len = static_cast(max_size); + *len = static_cast(max_bitmap_size); return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } @@ -845,10 +913,11 @@ bool Bitmap::bitfieldWriteAheadLog(const ObserverOrUniquePtr(value.data()), bit_offset % kBitmapSegmentBits)) { bit = true; } return bit; diff --git a/src/types/redis_bitmap.h b/src/types/redis_bitmap.h index a0ada45bfe0..9466593deb6 100644 --- a/src/types/redis_bitmap.h +++ b/src/types/redis_bitmap.h @@ -41,15 +41,18 @@ enum BitOpFlags { namespace redis { +// We use least-significant bit (LSB) numbering (also known as bit-endianness). +// This means that within a group of 8 bits, we read right-to-left. +// This is different from applying "bit" commands to string, which uses MSB. class Bitmap : public Database { public: class SegmentCacheStore; Bitmap(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {} - rocksdb::Status GetBit(const Slice &user_key, uint32_t offset, bool *bit); + rocksdb::Status GetBit(const Slice &user_key, uint32_t bit_offset, bool *bit); rocksdb::Status GetString(const Slice &user_key, uint32_t max_btos_size, std::string *value); - rocksdb::Status SetBit(const Slice &user_key, uint32_t offset, bool new_bit, bool *old_bit); - rocksdb::Status BitCount(const Slice &user_key, int64_t start, int64_t stop, uint32_t *cnt); + rocksdb::Status SetBit(const Slice &user_key, uint32_t bit_offset, bool new_bit, bool *old_bit); + rocksdb::Status BitCount(const Slice &user_key, int64_t start, int64_t stop, bool is_bit_index, uint32_t *cnt); rocksdb::Status BitPos(const Slice &user_key, bool bit, int64_t start, int64_t stop, bool stop_given, int64_t *pos); rocksdb::Status BitOp(BitOpFlags op_flag, const std::string &op_name, const Slice &user_key, const std::vector &op_keys, int64_t *len); @@ -63,7 +66,7 @@ class Bitmap : public Database { std::vector> *rets) { return bitfield(user_key, ops, rets); } - static bool GetBitFromValueAndOffset(const std::string &value, uint32_t offset); + static bool GetBitFromValueAndOffset(std::string_view value, uint32_t bit_offset); static bool IsEmptySegment(const Slice &segment); private: diff --git a/src/types/redis_bitmap_string.cc b/src/types/redis_bitmap_string.cc index 9b17963010a..b226d9c2f7a 100644 --- a/src/types/redis_bitmap_string.cc +++ b/src/types/redis_bitmap_string.cc @@ -24,6 +24,7 @@ #include +#include "common/bit_util.h" #include "redis_string.h" #include "server/redis_reply.h" #include "storage/redis_metadata.h" @@ -31,33 +32,28 @@ namespace redis { -rocksdb::Status BitmapString::GetBit(const std::string &raw_value, uint32_t offset, bool *bit) { - auto string_value = raw_value.substr(Metadata::GetOffsetAfterExpire(raw_value[0])); - uint32_t byte_index = offset >> 3; - uint32_t bit_val = 0; - uint32_t bit_offset = 7 - (offset & 0x7); +rocksdb::Status BitmapString::GetBit(const std::string &raw_value, uint32_t bit_offset, bool *bit) { + std::string_view string_value = std::string_view{raw_value}.substr(Metadata::GetOffsetAfterExpire(raw_value[0])); + uint32_t byte_index = bit_offset >> 3; if (byte_index < string_value.size()) { - bit_val = string_value[byte_index] & (1 << bit_offset); + *bit = util::msb::GetBit(reinterpret_cast(string_value.data()), bit_offset); + } else { + *bit = false; } - *bit = bit_val != 0; return rocksdb::Status::OK(); } -rocksdb::Status BitmapString::SetBit(const Slice &ns_key, std::string *raw_value, uint32_t offset, bool new_bit, +rocksdb::Status BitmapString::SetBit(const Slice &ns_key, std::string *raw_value, uint32_t bit_offset, bool new_bit, bool *old_bit) { size_t header_offset = Metadata::GetOffsetAfterExpire((*raw_value)[0]); auto string_value = raw_value->substr(header_offset); - uint32_t byte_index = offset >> 3; + uint32_t byte_index = bit_offset >> 3; if (byte_index >= string_value.size()) { // expand the bitmap string_value.append(byte_index - string_value.size() + 1, 0); } - uint32_t bit_offset = 7 - (offset & 0x7); - auto byteval = string_value[byte_index]; - *old_bit = (byteval & (1 << bit_offset)) != 0; - - byteval = static_cast(byteval & (~(1 << bit_offset))); - byteval = static_cast(byteval | ((new_bit & 0x1) << bit_offset)); - string_value[byte_index] = byteval; + auto *data_ptr = reinterpret_cast(string_value.data()); + *old_bit = util::msb::GetBit(data_ptr, bit_offset); + util::msb::SetBitTo(data_ptr, bit_offset, new_bit); *raw_value = raw_value->substr(0, header_offset); raw_value->append(string_value); @@ -68,45 +64,53 @@ rocksdb::Status BitmapString::SetBit(const Slice &ns_key, std::string *raw_value return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -rocksdb::Status BitmapString::BitCount(const std::string &raw_value, int64_t start, int64_t stop, uint32_t *cnt) { +rocksdb::Status BitmapString::BitCount(const std::string &raw_value, int64_t start, int64_t stop, bool is_bit_index, + uint32_t *cnt) { *cnt = 0; - auto string_value = raw_value.substr(Metadata::GetOffsetAfterExpire(raw_value[0])); - /* Convert negative indexes */ - if (start < 0 && stop < 0 && start > stop) { - return rocksdb::Status::OK(); - } + std::string_view string_value = std::string_view{raw_value}.substr(Metadata::GetOffsetAfterExpire(raw_value[0])); auto strlen = static_cast(string_value.size()); - if (start < 0) start = strlen + start; - if (stop < 0) stop = strlen + stop; - if (start < 0) start = 0; - if (stop < 0) stop = 0; - if (stop >= strlen) stop = strlen - 1; + int64_t totlen = strlen; + if (is_bit_index) totlen <<= 3; + std::tie(start, stop) = NormalizeRange(start, stop, totlen); + // Always return 0 if start is greater than stop after normalization. + if (start > stop) return rocksdb::Status::OK(); + + /* By default: + * start means start byte in bitmap, stop means stop byte in bitmap. + * When is_bit_index is true, start and stop means start bit and stop bit. + * So it should be normalized bit range to byte range. */ + int64_t start_byte = start; + int64_t stop_byte = stop; + uint8_t first_byte_neg_mask = 0, last_byte_neg_mask = 0; + std::tie(start_byte, stop_byte) = + NormalizeToByteRangeWithPaddingMask(is_bit_index, start, stop, &first_byte_neg_mask, &last_byte_neg_mask); /* Precondition: end >= 0 && end < strlen, so the only condition where * zero can be returned is: start > stop. */ - if (start <= stop) { - int64_t bytes = stop - start + 1; - *cnt = RawPopcount(reinterpret_cast(string_value.data()) + start, bytes); + int64_t bytes = stop_byte - start_byte + 1; + *cnt = util::RawPopcount(reinterpret_cast(string_value.data()) + start_byte, bytes); + if (first_byte_neg_mask != 0 || last_byte_neg_mask != 0) { + uint8_t firstlast[2] = {0, 0}; + if (first_byte_neg_mask != 0) firstlast[0] = string_value[start_byte] & first_byte_neg_mask; + if (last_byte_neg_mask != 0) firstlast[1] = string_value[stop_byte] & last_byte_neg_mask; + *cnt -= util::RawPopcount(firstlast, 2); } + return rocksdb::Status::OK(); } rocksdb::Status BitmapString::BitPos(const std::string &raw_value, bool bit, int64_t start, int64_t stop, bool stop_given, int64_t *pos) { - auto string_value = raw_value.substr(Metadata::GetOffsetAfterExpire(raw_value[0])); + std::string_view string_value = std::string_view{raw_value}.substr(Metadata::GetOffsetAfterExpire(raw_value[0])); auto strlen = static_cast(string_value.size()); - /* Convert negative indexes */ - if (start < 0) start = strlen + start; - if (stop < 0) stop = strlen + stop; - if (start < 0) start = 0; - if (stop < 0) stop = 0; - if (stop >= strlen) stop = strlen - 1; + /* Convert negative and out-of-bound indexes */ + std::tie(start, stop) = NormalizeRange(start, stop, strlen); if (start > stop) { *pos = -1; } else { int64_t bytes = stop - start + 1; - *pos = RawBitpos(reinterpret_cast(string_value.data()) + start, bytes, bit); + *pos = util::msb::RawBitpos(reinterpret_cast(string_value.data()) + start, bytes, bit); /* If we are looking for clear bits, and the user specified an exact * range with start-end, we can't consider the right of the range as @@ -124,85 +128,27 @@ rocksdb::Status BitmapString::BitPos(const std::string &raw_value, bool bit, int return rocksdb::Status::OK(); } -/* Count number of bits set in the binary array pointed by 's' and long - * 'count' bytes. The implementation of this function is required to - * work with a input string length up to 512 MB. - * */ -size_t BitmapString::RawPopcount(const uint8_t *p, int64_t count) { - size_t bits = 0; - - for (; count >= 8; p += 8, count -= 8) { - bits += __builtin_popcountll(*reinterpret_cast(p)); - } - - if (count > 0) { - uint64_t v = 0; - __builtin_memcpy(&v, p, count); - bits += __builtin_popcountll(v); - } - - return bits; +std::pair BitmapString::NormalizeRange(int64_t origin_start, int64_t origin_end, int64_t length) { + if (origin_start < 0) origin_start = length + origin_start; + if (origin_end < 0) origin_end = length + origin_end; + if (origin_start < 0) origin_start = 0; + if (origin_end < 0) origin_end = 0; + if (origin_end >= length) origin_end = length - 1; + return {origin_start, origin_end}; } -template -inline int ClzllWithEndian(uint64_t x) { - if constexpr (IsLittleEndian()) { - return __builtin_clzll(__builtin_bswap64(x)); - } else if constexpr (IsBigEndian()) { - return __builtin_clzll(x); - } else { - static_assert(AlwaysFalse); +std::pair BitmapString::NormalizeToByteRangeWithPaddingMask(bool is_bit, int64_t origin_start, + int64_t origin_end, + uint8_t *first_byte_neg_mask, + uint8_t *last_byte_neg_mask) { + DCHECK(origin_start <= origin_end); + if (is_bit) { + *first_byte_neg_mask = ~((1 << (8 - (origin_start & 7))) - 1) & 0xFF; + *last_byte_neg_mask = (1 << (7 - (origin_end & 7))) - 1; + origin_start >>= 3; + origin_end >>= 3; } -} - -/* Return the position of the first bit set to one (if 'bit' is 1) or - * zero (if 'bit' is 0) in the bitmap starting at 's' and long 'count' bytes. - * - * The function is guaranteed to return a value >= 0 if 'bit' is 0 since if - * no zero bit is found, it returns count*8 assuming the string is zero - * padded on the right. However if 'bit' is 1 it is possible that there is - * not a single set bit in the bitmap. In this special case -1 is returned. - * */ -int64_t BitmapString::RawBitpos(const uint8_t *c, int64_t count, bool bit) { - int64_t res = 0; - - if (bit) { - int64_t ct = count; - - for (; count >= 8; c += 8, count -= 8) { - uint64_t x = *reinterpret_cast(c); - if (x != 0) { - return res + ClzllWithEndian(x); - } - res += 64; - } - - if (count > 0) { - uint64_t v = 0; - __builtin_memcpy(&v, c, count); - res += v == 0 ? count * 8 : ClzllWithEndian(v); - } - - if (res == ct * 8) { - return -1; - } - } else { - for (; count >= 8; c += 8, count -= 8) { - uint64_t x = *reinterpret_cast(c); - if (x != (uint64_t)-1) { - return res + ClzllWithEndian(~x); - } - res += 64; - } - - if (count > 0) { - uint64_t v = -1; - __builtin_memcpy(&v, c, count); - res += v == (uint64_t)-1 ? count * 8 : ClzllWithEndian(~v); - } - } - - return res; + return {origin_start, origin_end}; } rocksdb::Status BitmapString::Bitfield(const Slice &ns_key, std::string *raw_value, diff --git a/src/types/redis_bitmap_string.h b/src/types/redis_bitmap_string.h index ab61c211bfa..7997165afa3 100644 --- a/src/types/redis_bitmap_string.h +++ b/src/types/redis_bitmap_string.h @@ -30,12 +30,14 @@ namespace redis { +// BitmapString handling bits using MSB numbering (also known as bit-endianness). class BitmapString : public Database { public: BitmapString(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {} - static rocksdb::Status GetBit(const std::string &raw_value, uint32_t offset, bool *bit); - rocksdb::Status SetBit(const Slice &ns_key, std::string *raw_value, uint32_t offset, bool new_bit, bool *old_bit); - static rocksdb::Status BitCount(const std::string &raw_value, int64_t start, int64_t stop, uint32_t *cnt); + static rocksdb::Status GetBit(const std::string &raw_value, uint32_t bit_offset, bool *bit); + rocksdb::Status SetBit(const Slice &ns_key, std::string *raw_value, uint32_t bit_offset, bool new_bit, bool *old_bit); + static rocksdb::Status BitCount(const std::string &raw_value, int64_t start, int64_t stop, bool is_bit_index, + uint32_t *cnt); static rocksdb::Status BitPos(const std::string &raw_value, bool bit, int64_t start, int64_t stop, bool stop_given, int64_t *pos); rocksdb::Status Bitfield(const Slice &ns_key, std::string *raw_value, const std::vector &ops, @@ -44,8 +46,30 @@ class BitmapString : public Database { const std::vector &ops, std::vector> *rets); - static size_t RawPopcount(const uint8_t *p, int64_t count); - static int64_t RawBitpos(const uint8_t *c, int64_t count, bool bit); + // NormalizeRange converts a range to a normalized range, which is a range with start and stop in [0, length). + // + // If start/end is negative, it will be converted to positive by adding length to it, and if the result is still + // negative, it will be converted to 0. + // If start/end is larger than length, it will be converted to length - 1. + // + // Return: + // The normalized [start, end] range. + static std::pair NormalizeRange(int64_t origin_start, int64_t origin_end, int64_t length); + + // NormalizeToByteRangeWithPaddingMask converts input index range to a normalized byte index range. + // If the is_bit_index is false, it does nothing. + // If the index_it_bit is true, it convert the bit index range to a normalized byte index range, and + // pad the first byte negative mask and last byte negative mask. + // Such as, If the starting bit is the third bit of the first byte like '00010000', the first_byte_neg_mask will be + // padded to '11100000', if the end bit is in the fifth bit of the last byte like '00000100', the last_byte_neg_mask + // will be padded to '00000011'. + // + // Return: + // The normalized [start_byte, stop_byte] + static std::pair NormalizeToByteRangeWithPaddingMask(bool is_bit_index, int64_t origin_start, + int64_t origin_end, + uint8_t *first_byte_neg_mask, + uint8_t *last_byte_neg_mask); }; } // namespace redis diff --git a/src/types/redis_hash.h b/src/types/redis_hash.h index fc004ed840d..8ae0a066cce 100644 --- a/src/types/redis_hash.h +++ b/src/types/redis_hash.h @@ -66,6 +66,8 @@ class Hash : public SubKeyScanner { private: rocksdb::Status GetMetadata(const Slice &ns_key, HashMetadata *metadata); + + friend struct FieldValueRetriever; }; } // namespace redis diff --git a/src/types/redis_json.h b/src/types/redis_json.h index a2135b756ee..8d0f15cb6dc 100644 --- a/src/types/redis_json.h +++ b/src/types/redis_json.h @@ -76,6 +76,8 @@ class Json : public Database { rocksdb::Status numop(JsonValue::NumOpEnum op, const std::string &user_key, const std::string &path, const std::string &value, JsonValue *result); std::vector readMulti(const std::vector &ns_keys, std::vector &values); + + friend struct FieldValueRetriever; }; } // namespace redis diff --git a/src/types/redis_stream.cc b/src/types/redis_stream.cc index 48fe7928c45..b03dc378ba9 100644 --- a/src/types/redis_stream.cc +++ b/src/types/redis_stream.cc @@ -250,7 +250,7 @@ StreamConsumerMetadata Stream::decodeStreamConsumerMetadataValue(const std::stri return consumer_metadata; } -StreamSubkeyType Stream::identifySubkeyType(const rocksdb::Slice &key) { +StreamSubkeyType Stream::identifySubkeyType(const rocksdb::Slice &key) const { InternalKey ikey(key, storage_->IsSlotIdEncoded()); Slice subkey = ikey.GetSubKey(); const size_t entry_id_size = sizeof(StreamEntryID); @@ -618,7 +618,9 @@ rocksdb::Status Stream::Len(const Slice &stream_name, const StreamLenOptions &op } for (; iter->Valid(); options.to_first ? iter->Prev() : iter->Next()) { - *size += 1; + if (identifySubkeyType(iter->key()) == StreamSubkeyType::StreamEntry) { + *size += 1; + } } return rocksdb::Status::OK(); @@ -674,6 +676,9 @@ rocksdb::Status Stream::range(const std::string &ns_key, const StreamMetadata &m for (; iter->Valid() && (options.reverse ? iter->key().ToString() >= end_key : iter->key().ToString() <= end_key); options.reverse ? iter->Prev() : iter->Next()) { + if (identifySubkeyType(iter->key()) != StreamSubkeyType::StreamEntry) { + continue; + } if (options.exclude_start && iter->key().ToString() == start_key) { continue; } diff --git a/src/types/redis_stream.h b/src/types/redis_stream.h index 8ae5a14dbad..8f6367a7196 100644 --- a/src/types/redis_stream.h +++ b/src/types/redis_stream.h @@ -82,7 +82,7 @@ class Stream : public SubKeyScanner { std::string consumerNameFromInternalKey(rocksdb::Slice key) const; static std::string encodeStreamConsumerMetadataValue(const StreamConsumerMetadata &consumer_metadata); static StreamConsumerMetadata decodeStreamConsumerMetadataValue(const std::string &value); - StreamSubkeyType identifySubkeyType(const rocksdb::Slice &key); + StreamSubkeyType identifySubkeyType(const rocksdb::Slice &key) const; }; } // namespace redis diff --git a/tests/cppunit/test_base.h b/tests/cppunit/test_base.h index 6d9d6a3957c..16b7837bb49 100644 --- a/tests/cppunit/test_base.h +++ b/tests/cppunit/test_base.h @@ -28,9 +28,13 @@ #include "storage/redis_db.h" #include "types/redis_hash.h" -class TestBase : public testing::Test { // NOLINT +class TestFixture { // NOLINT + public: + TestFixture(TestFixture &&) = delete; + TestFixture(const TestFixture &) = delete; + protected: - explicit TestBase() { + explicit TestFixture() { const char *path = "test.conf"; unlink(path); std::ofstream output_file(path, std::ios::out); @@ -48,7 +52,7 @@ class TestBase : public testing::Test { // NOLINT assert(s.IsOK()); } } - ~TestBase() override { + ~TestFixture() { storage_.reset(); std::error_code ec; @@ -66,4 +70,7 @@ class TestBase : public testing::Test { // NOLINT std::vector fields_; std::vector values_; }; + +class TestBase : public TestFixture, public ::testing::Test {}; + #endif // KVROCKS_TEST_BASE_H diff --git a/tests/cppunit/types/bitmap_test.cc b/tests/cppunit/types/bitmap_test.cc index 4742f01105d..4795e476ad0 100644 --- a/tests/cppunit/types/bitmap_test.cc +++ b/tests/cppunit/types/bitmap_test.cc @@ -26,7 +26,7 @@ #include "types/redis_bitmap.h" #include "types/redis_string.h" -class RedisBitmapTest : public TestBase { +class RedisBitmapTest : public TestFixture, public ::testing::TestWithParam { protected: explicit RedisBitmapTest() { bitmap_ = std::make_unique(storage_.get(), "bitmap_ns"); @@ -34,14 +34,25 @@ class RedisBitmapTest : public TestBase { } ~RedisBitmapTest() override = default; - void SetUp() override { key_ = "test_bitmap_key"; } - void TearDown() override {} + void SetUp() override { + key_ = "test_bitmap_key"; + if (bool use_bitmap = GetParam(); !use_bitmap) { + // Set an empty string. + string_->Set(key_, ""); + } + } + void TearDown() override { + [[maybe_unused]] auto s = bitmap_->Del(key_); + s = string_->Del(key_); + } std::unique_ptr bitmap_; std::unique_ptr string_; }; -TEST_F(RedisBitmapTest, GetAndSetBit) { +INSTANTIATE_TEST_SUITE_P(UseBitmap, RedisBitmapTest, testing::Values(true, false)); + +TEST_P(RedisBitmapTest, GetAndSetBit) { uint32_t offsets[] = {0, 123, 1024 * 8, 1024 * 8 + 1, 3 * 1024 * 8, 3 * 1024 * 8 + 1}; for (const auto &offset : offsets) { bool bit = false; @@ -54,33 +65,134 @@ TEST_F(RedisBitmapTest, GetAndSetBit) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, BitCount) { +TEST_P(RedisBitmapTest, BitCount) { uint32_t offsets[] = {0, 123, 1024 * 8, 1024 * 8 + 1, 3 * 1024 * 8, 3 * 1024 * 8 + 1}; for (const auto &offset : offsets) { bool bit = false; bitmap_->SetBit(key_, offset, true, &bit); } uint32_t cnt = 0; - bitmap_->BitCount(key_, 0, 4 * 1024, &cnt); + bitmap_->BitCount(key_, 0, 4 * 1024, false, &cnt); + EXPECT_EQ(cnt, 6); + bitmap_->BitCount(key_, 0, -1, false, &cnt); + EXPECT_EQ(cnt, 6); + auto s = bitmap_->Del(key_); +} + +TEST_P(RedisBitmapTest, BitCountNegative) { + { + bool bit = false; + bitmap_->SetBit(key_, 0, true, &bit); + EXPECT_FALSE(bit); + } + uint32_t cnt = 0; + bitmap_->BitCount(key_, 0, 4 * 1024, false, &cnt); + EXPECT_EQ(cnt, 1); + bitmap_->BitCount(key_, 0, 0, false, &cnt); + EXPECT_EQ(cnt, 1); + bitmap_->BitCount(key_, 0, -1, false, &cnt); + EXPECT_EQ(cnt, 1); + bitmap_->BitCount(key_, -1, -1, false, &cnt); + EXPECT_EQ(cnt, 1); + bitmap_->BitCount(key_, 1, 1, false, &cnt); + EXPECT_EQ(cnt, 0); + bitmap_->BitCount(key_, -10000, -10000, false, &cnt); + EXPECT_EQ(cnt, 1); + + { + bool bit = false; + bitmap_->SetBit(key_, 5, true, &bit); + EXPECT_FALSE(bit); + } + bitmap_->BitCount(key_, -10000, -10000, false, &cnt); + EXPECT_EQ(cnt, 2); + + { + bool bit = false; + bitmap_->SetBit(key_, 8 * 1024 - 1, true, &bit); + EXPECT_FALSE(bit); + bitmap_->SetBit(key_, 8 * 1024, true, &bit); + EXPECT_FALSE(bit); + } + + bitmap_->BitCount(key_, 0, 1024, false, &cnt); + EXPECT_EQ(cnt, 4); + + bitmap_->BitCount(key_, 0, 1023, false, &cnt); + EXPECT_EQ(cnt, 3); + + auto s = bitmap_->Del(key_); +} + +TEST_P(RedisBitmapTest, BitCountBITOption) { + std::set offsets = {0, 100, 1024 * 8, 1024 * 8 + 1, 3 * 1024 * 8, 3 * 1024 * 8 + 1}; + for (const auto &offset : offsets) { + bool bit = false; + bitmap_->SetBit(key_, offset, true, &bit); + } + + for (uint32_t bit_offset = 0; bit_offset <= 3 * 1024 * 8 + 10; ++bit_offset) { + uint32_t cnt = 0; + EXPECT_TRUE(bitmap_->BitCount(key_, bit_offset, bit_offset, true, &cnt).ok()); + if (offsets.count(bit_offset) > 0) { + ASSERT_EQ(1, cnt) << "bit_offset: " << bit_offset; + } else { + ASSERT_EQ(0, cnt) << "bit_offset: " << bit_offset; + } + } + + uint32_t cnt = 0; + bitmap_->BitCount(key_, 0, 4 * 1024 * 8, true, &cnt); EXPECT_EQ(cnt, 6); - bitmap_->BitCount(key_, 0, -1, &cnt); + bitmap_->BitCount(key_, 0, -1, true, &cnt); EXPECT_EQ(cnt, 6); + bitmap_->BitCount(key_, 0, 3 * 1024 * 8 + 1, true, &cnt); + EXPECT_EQ(cnt, 6); + bitmap_->BitCount(key_, 1, 3 * 1024 * 8 + 1, true, &cnt); + EXPECT_EQ(cnt, 5); + bitmap_->BitCount(key_, 0, 0, true, &cnt); + EXPECT_EQ(cnt, 1); + bitmap_->BitCount(key_, 0, 100, true, &cnt); + EXPECT_EQ(cnt, 2); + bitmap_->BitCount(key_, 100, 1024 * 8, true, &cnt); + EXPECT_EQ(cnt, 2); + bitmap_->BitCount(key_, 100, 3 * 1024 * 8, true, &cnt); + EXPECT_EQ(cnt, 4); + bitmap_->BitCount(key_, -1, -1, true, &cnt); + EXPECT_EQ(cnt, 0); // NOTICE: the min storage unit is byte, the result is the same as Redis. auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, BitPosClearBit) { +TEST_P(RedisBitmapTest, BitPosClearBit) { int64_t pos = 0; bool old_bit = false; + bool use_bitmap = GetParam(); for (int i = 0; i < 1024 + 16; i++) { - bitmap_->BitPos(key_, false, 0, -1, true, &pos); - EXPECT_EQ(pos, i); + /// ``` + /// redis> set k1 "" + /// "OK" + /// redis> bitpos k1 0 + /// (integer) -1 + /// redis> bitpos k2 0 + /// (integer) 0 + /// ``` + /// + /// String will set a empty string value when initializing, so, when first + /// querying, it should return -1. + bitmap_->BitPos(key_, false, 0, -1, /*stop_given=*/false, &pos); + if (i == 0 && !use_bitmap) { + EXPECT_EQ(pos, -1); + } else { + EXPECT_EQ(pos, i); + } + bitmap_->SetBit(key_, i, true, &old_bit); EXPECT_FALSE(old_bit); } auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, BitPosSetBit) { +TEST_P(RedisBitmapTest, BitPosSetBit) { uint32_t offsets[] = {0, 123, 1024 * 8, 1024 * 8 + 16, 3 * 1024 * 8, 3 * 1024 * 8 + 16}; for (const auto &offset : offsets) { bool bit = false; @@ -95,7 +207,61 @@ TEST_F(RedisBitmapTest, BitPosSetBit) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, BitfieldGetSetTest) { +TEST_P(RedisBitmapTest, BitPosNegative) { + { + bool bit = false; + bitmap_->SetBit(key_, 8 * 1024 - 1, true, &bit); + EXPECT_FALSE(bit); + } + int64_t pos = 0; + // First bit is negative + bitmap_->BitPos(key_, false, 0, -1, true, &pos); + EXPECT_EQ(0, pos); + // 8 * 1024 - 1 bit is positive + bitmap_->BitPos(key_, true, 0, -1, true, &pos); + EXPECT_EQ(8 * 1024 - 1, pos); + // First bit in 1023 byte is negative + bitmap_->BitPos(key_, false, -1, -1, true, &pos); + EXPECT_EQ(8 * 1023, pos); + // Last Bit in 1023 byte is positive + bitmap_->BitPos(key_, true, -1, -1, true, &pos); + EXPECT_EQ(8 * 1024 - 1, pos); + // Large negative number will be normalized. + bitmap_->BitPos(key_, false, -10000, -10000, true, &pos); + EXPECT_EQ(0, pos); + + auto s = bitmap_->Del(key_); +} + +// When `stop_given` is true, even searching for 0, +// we cannot exceeds the stop position. +TEST_P(RedisBitmapTest, BitPosStopGiven) { + for (int i = 0; i < 8; ++i) { + bool bit = true; + bitmap_->SetBit(key_, i, true, &bit); + EXPECT_FALSE(bit); + } + int64_t pos = 0; + bitmap_->BitPos(key_, false, 0, 0, /*stop_given=*/true, &pos); + EXPECT_EQ(-1, pos); + bitmap_->BitPos(key_, false, 0, 0, /*stop_given=*/false, &pos); + EXPECT_EQ(8, pos); + + // Set a bit at 8 not affect that + { + bool bit = true; + bitmap_->SetBit(key_, 8, true, &bit); + EXPECT_FALSE(bit); + } + bitmap_->BitPos(key_, false, 0, 0, /*stop_given=*/true, &pos); + EXPECT_EQ(-1, pos); + bitmap_->BitPos(key_, false, 0, 1, /*stop_given=*/false, &pos); + EXPECT_EQ(9, pos); + + auto s = bitmap_->Del(key_); +} + +TEST_P(RedisBitmapTest, BitfieldGetSetTest) { constexpr uint32_t magic = 0xdeadbeef; std::vector> rets; @@ -125,7 +291,7 @@ TEST_F(RedisBitmapTest, BitfieldGetSetTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, UnsignedBitfieldTest) { +TEST_P(RedisBitmapTest, UnsignedBitfieldTest) { constexpr uint8_t bits = 5; static_assert(bits < 64); constexpr uint64_t max = (uint64_t(1) << bits) - 1; @@ -154,7 +320,7 @@ TEST_F(RedisBitmapTest, UnsignedBitfieldTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, SignedBitfieldTest) { +TEST_P(RedisBitmapTest, SignedBitfieldTest) { constexpr uint8_t bits = 10; constexpr int64_t max = (uint64_t(1) << (bits - 1)) - 1; constexpr int64_t min = -max - 1; @@ -182,7 +348,7 @@ TEST_F(RedisBitmapTest, SignedBitfieldTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, SignedBitfieldWrapSetTest) { +TEST_P(RedisBitmapTest, SignedBitfieldWrapSetTest) { constexpr uint8_t bits = 6; constexpr int64_t max = (int64_t(1) << (bits - 1)) - 1; constexpr int64_t min = -max - 1; @@ -217,7 +383,7 @@ TEST_F(RedisBitmapTest, SignedBitfieldWrapSetTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, UnsignedBitfieldWrapSetTest) { +TEST_P(RedisBitmapTest, UnsignedBitfieldWrapSetTest) { constexpr uint8_t bits = 6; static_assert(bits < 64); constexpr uint64_t max = (uint64_t(1) << bits) - 1; @@ -252,7 +418,7 @@ TEST_F(RedisBitmapTest, UnsignedBitfieldWrapSetTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, SignedBitfieldSatSetTest) { +TEST_P(RedisBitmapTest, SignedBitfieldSatSetTest) { constexpr uint8_t bits = 6; constexpr int64_t max = (int64_t(1) << (bits - 1)) - 1; @@ -288,7 +454,7 @@ TEST_F(RedisBitmapTest, SignedBitfieldSatSetTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, UnsignedBitfieldSatSetTest) { +TEST_P(RedisBitmapTest, UnsignedBitfieldSatSetTest) { constexpr uint8_t bits = 6; static_assert(bits < 64); constexpr uint64_t max = (uint64_t(1) << bits) - 1; @@ -325,7 +491,7 @@ TEST_F(RedisBitmapTest, UnsignedBitfieldSatSetTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, SignedBitfieldFailSetTest) { +TEST_P(RedisBitmapTest, SignedBitfieldFailSetTest) { constexpr uint8_t bits = 5; constexpr int64_t max = (int64_t(1) << (bits - 1)) - 1; @@ -361,7 +527,7 @@ TEST_F(RedisBitmapTest, SignedBitfieldFailSetTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, UnsignedBitfieldFailSetTest) { +TEST_P(RedisBitmapTest, UnsignedBitfieldFailSetTest) { constexpr uint8_t bits = 5; constexpr int64_t max = (int64_t(1) << bits) - 1; @@ -397,7 +563,10 @@ TEST_F(RedisBitmapTest, UnsignedBitfieldFailSetTest) { auto s = bitmap_->Del(key_); } -TEST_F(RedisBitmapTest, BitfieldStringGetSetTest) { +TEST_P(RedisBitmapTest, BitfieldStringGetSetTest) { + if (bool use_bitmap = GetParam(); use_bitmap) { + GTEST_SKIP() << "skip bitmap test for BitfieldStringGetSetTest"; + } std::string str = "dan yuan ren chang jiu, qian li gong chan juan."; string_->Set(key_, str); diff --git a/tests/gocase/integration/slotmigrate/slotmigrate_test.go b/tests/gocase/integration/slotmigrate/slotmigrate_test.go index afae86d1869..6901f5f2d08 100644 --- a/tests/gocase/integration/slotmigrate/slotmigrate_test.go +++ b/tests/gocase/integration/slotmigrate/slotmigrate_test.go @@ -35,6 +35,7 @@ import ( type SlotMigrationState string type SlotImportState string +type SlotMigrationType string const ( SlotMigrationStateStarted SlotMigrationState = "start" @@ -43,8 +44,13 @@ const ( SlotImportStateSuccess SlotImportState = "success" SlotImportStateFailed SlotImportState = "error" + + MigrationTypeRedisCommand SlotMigrationType = "redis-command" + MigrationTypeRawKeyValue SlotMigrationType = "raw-key-value" ) +var testSlot = 0 + func TestSlotMigrateFromSlave(t *testing.T) { ctx := context.Background() @@ -530,10 +536,13 @@ func TestSlotMigrateDataType(t *testing.T) { require.EqualValues(t, cnt, rdb1.LLen(ctx, util.SlotTable[slot]).Val()) }) - migrateAllTypes := func(t *testing.T, sync bool, slot int) { + migrateAllTypes := func(t *testing.T, migrateType SlotMigrationType, sync bool) { + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-type", string(migrateType)).Err()) + + testSlot += 1 keys := make(map[string]string, 0) for _, typ := range []string{"string", "expired_string", "list", "hash", "set", "zset", "bitmap", "sortint", "stream"} { - keys[typ] = fmt.Sprintf("%s_{%s}", typ, util.SlotTable[slot]) + keys[typ] = fmt.Sprintf("%s_{%s}", typ, util.SlotTable[testSlot]) require.NoError(t, rdb0.Del(ctx, keys[typ]).Err()) } // type string @@ -604,12 +613,11 @@ func TestSlotMigrateDataType(t *testing.T) { require.EqualValues(t, "0-0", streamInfo.MaxDeletedEntryID) require.EqualValues(t, 19, streamInfo.Length) - // migrate slot 1, all keys above are belong to slot 1 if !sync { - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) } else { - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1, "sync").Val()) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1, "sync").Val()) } // check destination data @@ -657,19 +665,12 @@ func TestSlotMigrateDataType(t *testing.T) { } } - t.Run("MIGRATE - Slot migrate all types of existing data", func(t *testing.T) { - migrateAllTypes(t, false, 1) - }) - - t.Run("MIGRATE - Slot migrate all types of existing data (sync)", func(t *testing.T) { - migrateAllTypes(t, true, 2) - }) - - t.Run("MIGRATE - increment sync stream from WAL", func(t *testing.T) { - slot := 40 + migrateIncrementalStream := func(t *testing.T, migrateType SlotMigrationType) { + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-type", string(migrateType)).Err()) + testSlot += 1 keys := make(map[string]string, 0) for _, typ := range []string{"stream"} { - keys[typ] = fmt.Sprintf("%s_{%s}", typ, util.SlotTable[slot]) + keys[typ] = fmt.Sprintf("%s_{%s}", typ, util.SlotTable[testSlot]) require.NoError(t, rdb0.Del(ctx, keys[typ]).Err()) } for i := 1; i < 1000; i++ { @@ -691,7 +692,7 @@ func TestSlotMigrateDataType(t *testing.T) { defer func() { require.NoError(t, rdb0.ConfigSet(ctx, "migrate-speed", "4096").Err()) }() - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) newStreamID := "1001" require.NoError(t, rdb0.XAdd(ctx, &redis.XAddArgs{ Stream: keys["stream"], @@ -700,18 +701,20 @@ func TestSlotMigrateDataType(t *testing.T) { }).Err()) require.NoError(t, rdb0.XDel(ctx, keys["stream"], "1-0").Err()) require.NoError(t, rdb0.Do(ctx, "XSETID", keys["stream"], "1001-0", "MAXDELETEDID", "2-0").Err()) - waitForMigrateStateInDuration(t, rdb0, slot, SlotMigrationStateSuccess, time.Minute) + waitForMigrateStateInDuration(t, rdb0, testSlot, SlotMigrationStateSuccess, time.Minute) streamInfo = rdb1.XInfoStream(ctx, keys["stream"]).Val() require.EqualValues(t, "1001-0", streamInfo.LastGeneratedID) require.EqualValues(t, 1000, streamInfo.EntriesAdded) require.EqualValues(t, "2-0", streamInfo.MaxDeletedEntryID) require.EqualValues(t, 999, streamInfo.Length) - }) + } - t.Run("MIGRATE - Migrating empty stream", func(t *testing.T) { - slot := 31 - key := fmt.Sprintf("stream_{%s}", util.SlotTable[slot]) + migrateEmptyStream := func(t *testing.T, migrateType SlotMigrationType) { + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-type", string(migrateType)).Err()) + + testSlot += 1 + key := fmt.Sprintf("stream_{%s}", util.SlotTable[testSlot]) require.NoError(t, rdb0.Del(ctx, key).Err()) @@ -736,8 +739,8 @@ func TestSlotMigrateDataType(t *testing.T) { require.EqualValues(t, "7-0", originRes.MaxDeletedEntryID) require.EqualValues(t, 0, originRes.Length) - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) require.ErrorContains(t, rdb0.Exists(ctx, key).Err(), "MOVED") @@ -748,11 +751,13 @@ func TestSlotMigrateDataType(t *testing.T) { require.EqualValues(t, originRes.EntriesAdded, migratedRes.EntriesAdded) require.EqualValues(t, originRes.MaxDeletedEntryID, migratedRes.MaxDeletedEntryID) require.EqualValues(t, originRes.Length, migratedRes.Length) - }) + } - t.Run("MIGRATE - Migrating stream with deleted entries", func(t *testing.T) { - slot := 32 - key := fmt.Sprintf("stream_{%s}", util.SlotTable[slot]) + migrateStreamWithDeletedEnties := func(t *testing.T, migrateType SlotMigrationType) { + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-type", string(migrateType)).Err()) + + testSlot += 1 + key := fmt.Sprintf("stream_{%s}", util.SlotTable[testSlot]) require.NoError(t, rdb0.Del(ctx, key).Err()) @@ -775,8 +780,8 @@ func TestSlotMigrateDataType(t *testing.T) { require.EqualValues(t, "3-0", originRes.MaxDeletedEntryID) require.EqualValues(t, 3, originRes.Length) - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) require.ErrorContains(t, rdb0.Exists(ctx, key).Err(), "MOVED") @@ -787,74 +792,48 @@ func TestSlotMigrateDataType(t *testing.T) { require.EqualValues(t, originRes.EntriesAdded, migratedRes.EntriesAdded) require.EqualValues(t, originRes.MaxDeletedEntryID, migratedRes.MaxDeletedEntryID) require.EqualValues(t, originRes.Length, migratedRes.Length) - }) - - t.Run("MIGRATE - Accessing slot is forbidden on source server but not on destination server", func(t *testing.T) { - slot := 3 - require.NoError(t, rdb0.Set(ctx, util.SlotTable[slot], 3, 0).Err()) - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) - require.ErrorContains(t, rdb0.Set(ctx, util.SlotTable[slot], "source-value", 0).Err(), "MOVED") - require.ErrorContains(t, rdb0.Del(ctx, util.SlotTable[slot]).Err(), "MOVED") - require.ErrorContains(t, rdb0.Exists(ctx, util.SlotTable[slot]).Err(), "MOVED") - require.NoError(t, rdb1.Set(ctx, util.SlotTable[slot], "destination-value", 0).Err()) - }) - - t.Run("MIGRATE - Slot isn't forbidden writing when starting migrating", func(t *testing.T) { - slot := 5 - cnt := 20000 - for i := 0; i < cnt; i++ { - require.NoError(t, rdb0.LPush(ctx, util.SlotTable[slot], i).Err()) - } - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) - requireMigrateState(t, rdb0, slot, SlotMigrationStateStarted) - // write during migrating - require.EqualValues(t, cnt+1, rdb0.LPush(ctx, util.SlotTable[slot], cnt).Val()) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) - require.Equal(t, strconv.Itoa(cnt), rdb1.LPop(ctx, util.SlotTable[slot]).Val()) - }) - - t.Run("MIGRATE - Slot keys are not cleared after migration but cleared after setslot", func(t *testing.T) { - slot := 6 - require.NoError(t, rdb0.Set(ctx, util.SlotTable[slot], "slot6", 0).Err()) - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) - require.Equal(t, "slot6", rdb1.Get(ctx, util.SlotTable[slot]).Val()) - require.Contains(t, rdb0.Keys(ctx, "*").Val(), util.SlotTable[slot]) - require.NoError(t, rdb0.Do(ctx, "clusterx", "setslot", slot, "node", id1, "2").Err()) - require.NotContains(t, rdb0.Keys(ctx, "*").Val(), util.SlotTable[slot]) - }) + } - t.Run("MIGRATE - Migrate incremental data via parsing and filtering data in WAL", func(t *testing.T) { - migratingSlot := 15 + migrateIncrementalData := func(t *testing.T, migrateType SlotMigrationType) { + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-type", string(migrateType)).Err()) + testSlot += 1 + migratingSlot := testSlot + hashtag := util.SlotTable[migratingSlot] keys := []string{ // key for slowing migrate-speed when migrating existing data - util.SlotTable[migratingSlot], - // the following keys belong to slot 15; keys of all the data types (string/hash/set/zset/list/sortint) - "key:000042915392", - "key:000043146202", - "key:000044434182", - "key:000045189446", - "key:000047413016", - "key:000049190069", - "key:000049930003", - "key:000049980785", - "key:000056730838", + hashtag, + fmt.Sprintf("{%s}_key1", hashtag), + fmt.Sprintf("{%s}_key2", hashtag), + fmt.Sprintf("{%s}_key3", hashtag), + fmt.Sprintf("{%s}_key4", hashtag), + fmt.Sprintf("{%s}_key5", hashtag), + fmt.Sprintf("{%s}_key6", hashtag), + fmt.Sprintf("{%s}_key7", hashtag), + fmt.Sprintf("{%s}_key8", hashtag), + fmt.Sprintf("{%s}_key9", hashtag), } for _, key := range keys { require.NoError(t, rdb0.Del(ctx, key).Err()) } - require.NoError(t, rdb0.ConfigSet(ctx, "migrate-speed", "64").Err()) - require.Equal(t, map[string]string{"migrate-speed": "64"}, rdb0.ConfigGet(ctx, "migrate-speed").Val()) + + valuePrefix := "value" + if migrateType == MigrationTypeRedisCommand { + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-speed", "64").Err()) + } else { + // Create enough data + valuePrefix = strings.Repeat("value", 1024) + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-batch-rate-limit-mb", "1").Err()) + } cnt := 2000 for i := 0; i < cnt; i++ { - require.NoError(t, rdb0.LPush(ctx, keys[0], i).Err()) + require.NoError(t, rdb0.LPush(ctx, keys[0], fmt.Sprintf("%s-%d", valuePrefix, i)).Err()) } require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", migratingSlot, id1).Val()) // write key that doesn't belong to this slot - nonMigratingSlot := 12 + testSlot += 1 + nonMigratingSlot := testSlot require.NoError(t, rdb0.Del(ctx, util.SlotTable[nonMigratingSlot]).Err()) require.NoError(t, rdb0.Set(ctx, util.SlotTable[nonMigratingSlot], "non-migrating-value", 0).Err()) @@ -868,12 +847,14 @@ func TestSlotMigrateDataType(t *testing.T) { require.NoError(t, rdb0.SetBit(ctx, keys[3], 10086, 1).Err()) require.NoError(t, rdb0.Expire(ctx, keys[3], 10000*time.Second).Err()) // verify expireat binlog could be parsed - slotWithExpiringKey := nonMigratingSlot + 1 + testSlot += 1 + slotWithExpiringKey := testSlot require.NoError(t, rdb0.Del(ctx, util.SlotTable[slotWithExpiringKey]).Err()) require.NoError(t, rdb0.Set(ctx, util.SlotTable[slotWithExpiringKey], "expiring-value", 0).Err()) require.NoError(t, rdb0.ExpireAt(ctx, util.SlotTable[slotWithExpiringKey], time.Now().Add(100*time.Second)).Err()) // verify del command - slotWithDeletedKey := nonMigratingSlot + 2 + testSlot += 1 + slotWithDeletedKey := testSlot require.NoError(t, rdb0.Set(ctx, util.SlotTable[slotWithDeletedKey], "will-be-deleted", 0).Err()) require.NoError(t, rdb0.Del(ctx, util.SlotTable[slotWithDeletedKey]).Err()) // 2. type hash @@ -912,12 +893,6 @@ func TestSlotMigrateDataType(t *testing.T) { for i := 10000; i < 11000; i += 2 { require.NoError(t, rdb0.SetBit(ctx, keys[8], int64(i), 1).Err()) } - for i := 20000; i < 21000; i += 5 { - res := rdb0.BitField(ctx, keys[8], "SET", "u5", strconv.Itoa(i), 23) - require.NoError(t, res.Err()) - require.EqualValues(t, 1, len(res.Val())) - require.EqualValues(t, 0, res.Val()[0]) - } // 7. type sortint require.NoError(t, rdb0.Do(ctx, "SIADD", keys[9], 2, 4, 1, 3).Err()) require.NoError(t, rdb0.Do(ctx, "SIREM", keys[9], 2).Err()) @@ -963,12 +938,6 @@ func TestSlotMigrateDataType(t *testing.T) { for i := 0; i < 20; i += 2 { require.EqualValues(t, 0, rdb1.GetBit(ctx, keys[8], int64(i)).Val()) } - for i := 20000; i < 21000; i += 5 { - res := rdb1.BitField(ctx, keys[8], "GET", "u5", strconv.Itoa(i)) - require.NoError(t, res.Err()) - require.EqualValues(t, 1, len(res.Val())) - require.EqualValues(t, 23, res.Val()[0]) - } // 7. type sortint require.EqualValues(t, siv, rdb1.Do(ctx, "SIRANGE", keys[9], 0, -1).Val()) @@ -976,47 +945,112 @@ func TestSlotMigrateDataType(t *testing.T) { require.Equal(t, "non-migrating-value", rdb0.Get(ctx, util.SlotTable[nonMigratingSlot]).Val()) require.ErrorContains(t, rdb1.Exists(ctx, util.SlotTable[nonMigratingSlot]).Err(), "MOVED") require.EqualValues(t, 0, rdb0.Exists(ctx, util.SlotTable[slotWithDeletedKey]).Val()) + } + + testMigrationTypes := []SlotMigrationType{MigrationTypeRedisCommand, MigrationTypeRawKeyValue} + + for _, testType := range testMigrationTypes { + t.Run(fmt.Sprintf("MIGRATE - Slot migrate all types of existing data using %s", testType), func(t *testing.T) { + migrateAllTypes(t, testType, false) + }) + + t.Run(fmt.Sprintf("MIGRATE - Slot migrate all types of existing data (sync) using %s", testType), func(t *testing.T) { + migrateAllTypes(t, testType, true) + }) + + t.Run(fmt.Sprintf("MIGRATE - increment sync stream from WAL using %s", testType), func(t *testing.T) { + migrateIncrementalStream(t, testType) + }) + + t.Run(fmt.Sprintf("MIGRATE - Migrating empty stream using %s", testType), func(t *testing.T) { + migrateEmptyStream(t, testType) + }) + + t.Run(fmt.Sprintf("MIGRATE - Migrating stream with deleted entries using %s", testType), func(t *testing.T) { + migrateStreamWithDeletedEnties(t, testType) + }) + + t.Run(fmt.Sprintf("MIGRATE - Migrate incremental data via parsing and filtering data in WAL using %s", testType), func(t *testing.T) { + migrateIncrementalData(t, testType) + }) + } + + t.Run("MIGRATE - Accessing slot is forbidden on source server but not on destination server", func(t *testing.T) { + testSlot += 1 + require.NoError(t, rdb0.Set(ctx, util.SlotTable[testSlot], 3, 0).Err()) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) + require.ErrorContains(t, rdb0.Set(ctx, util.SlotTable[testSlot], "source-value", 0).Err(), "MOVED") + require.ErrorContains(t, rdb0.Del(ctx, util.SlotTable[testSlot]).Err(), "MOVED") + require.ErrorContains(t, rdb0.Exists(ctx, util.SlotTable[testSlot]).Err(), "MOVED") + require.NoError(t, rdb1.Set(ctx, util.SlotTable[testSlot], "destination-value", 0).Err()) + }) + + t.Run("MIGRATE - Slot isn't forbidden writing when starting migrating", func(t *testing.T) { + testSlot += 1 + cnt := 20000 + for i := 0; i < cnt; i++ { + require.NoError(t, rdb0.LPush(ctx, util.SlotTable[testSlot], i).Err()) + } + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + requireMigrateState(t, rdb0, testSlot, SlotMigrationStateStarted) + // write during migrating + require.EqualValues(t, cnt+1, rdb0.LPush(ctx, util.SlotTable[testSlot], cnt).Val()) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) + require.Equal(t, strconv.Itoa(cnt), rdb1.LPop(ctx, util.SlotTable[testSlot]).Val()) + }) + + t.Run("MIGRATE - Slot keys are not cleared after migration but cleared after setslot", func(t *testing.T) { + testSlot += 1 + require.NoError(t, rdb0.Set(ctx, util.SlotTable[testSlot], "slot6", 0).Err()) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) + require.Equal(t, "slot6", rdb1.Get(ctx, util.SlotTable[testSlot]).Val()) + require.Contains(t, rdb0.Keys(ctx, "*").Val(), util.SlotTable[testSlot]) + require.NoError(t, rdb0.Do(ctx, "clusterx", "setslot", testSlot, "node", id1, "2").Err()) + require.NotContains(t, rdb0.Keys(ctx, "*").Val(), util.SlotTable[testSlot]) }) t.Run("MIGRATE - Slow migrate speed", func(t *testing.T) { - slot := 16 + require.NoError(t, rdb0.ConfigSet(ctx, "migrate-type", string(MigrationTypeRedisCommand)).Err()) + testSlot += 1 require.NoError(t, rdb0.ConfigSet(ctx, "migrate-speed", "16").Err()) require.Equal(t, map[string]string{"migrate-speed": "16"}, rdb0.ConfigGet(ctx, "migrate-speed").Val()) - require.NoError(t, rdb0.Del(ctx, util.SlotTable[slot]).Err()) + require.NoError(t, rdb0.Del(ctx, util.SlotTable[testSlot]).Err()) // more than pipeline size(16) and max items(16) in command cnt := 1000 for i := 0; i < cnt; i++ { - require.NoError(t, rdb0.LPush(ctx, util.SlotTable[slot], i).Err()) + require.NoError(t, rdb0.LPush(ctx, util.SlotTable[testSlot], i).Err()) } - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) // should not finish 1.5s time.Sleep(1500 * time.Millisecond) - requireMigrateState(t, rdb0, slot, SlotMigrationStateStarted) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) + requireMigrateState(t, rdb0, testSlot, SlotMigrationStateStarted) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) }) t.Run("MIGRATE - Data of migrated slot can't be written to source but can be written to destination", func(t *testing.T) { - slot := 17 - require.NoError(t, rdb0.Del(ctx, util.SlotTable[slot]).Err()) + testSlot += 1 + require.NoError(t, rdb0.Del(ctx, util.SlotTable[testSlot]).Err()) cnt := 100 for i := 0; i < cnt; i++ { - require.NoError(t, rdb0.LPush(ctx, util.SlotTable[slot], i).Err()) + require.NoError(t, rdb0.LPush(ctx, util.SlotTable[testSlot], i).Err()) } - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot, id1).Val()) - waitForMigrateState(t, rdb0, slot, SlotMigrationStateSuccess) - require.EqualValues(t, cnt, rdb1.LLen(ctx, util.SlotTable[slot]).Val()) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) + require.EqualValues(t, cnt, rdb1.LLen(ctx, util.SlotTable[testSlot]).Val()) // write the migrated slot to source server - k := fmt.Sprintf("{%s}_1", util.SlotTable[slot]) + k := fmt.Sprintf("{%s}_1", util.SlotTable[testSlot]) require.ErrorContains(t, rdb0.Set(ctx, k, "slot17_value1", 0).Err(), "MOVED") // write the migrated slot to destination server require.NoError(t, rdb1.Set(ctx, k, "slot17_value1", 0).Err()) }) t.Run("MIGRATE - LMOVE (src and dst are different) via parsing WAL logs", func(t *testing.T) { - slot1 := 18 + testSlot += 1 - srcListName := fmt.Sprintf("list_src_{%s}", util.SlotTable[slot1]) - dstListName := fmt.Sprintf("list_dst_{%s}", util.SlotTable[slot1]) + srcListName := fmt.Sprintf("list_src_{%s}", util.SlotTable[testSlot]) + dstListName := fmt.Sprintf("list_dst_{%s}", util.SlotTable[testSlot]) require.NoError(t, rdb0.Del(ctx, srcListName).Err()) require.NoError(t, rdb0.Del(ctx, dstListName).Err()) @@ -1030,13 +1064,13 @@ func TestSlotMigrateDataType(t *testing.T) { require.NoError(t, rdb0.RPush(ctx, srcListName, fmt.Sprintf("element%d", i)).Err()) } - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot1, id1).Val()) - requireMigrateState(t, rdb0, slot1, SlotMigrationStateStarted) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + requireMigrateState(t, rdb0, testSlot, SlotMigrationStateStarted) for i := 0; i < 10; i++ { require.NoError(t, rdb0.LMove(ctx, srcListName, dstListName, "RIGHT", "LEFT").Err()) } - waitForMigrateState(t, rdb0, slot1, SlotMigrationStateSuccess) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) require.ErrorContains(t, rdb0.RPush(ctx, srcListName, "element1000").Err(), "MOVED") require.Equal(t, int64(10), rdb1.LLen(ctx, dstListName).Val()) @@ -1048,9 +1082,9 @@ func TestSlotMigrateDataType(t *testing.T) { }) t.Run("MIGRATE - LMOVE (src and dst are the same) via parsing WAL logs", func(t *testing.T) { - slot1 := 19 + testSlot += 1 - srcListName := fmt.Sprintf("list_src_{%s}", util.SlotTable[slot1]) + srcListName := fmt.Sprintf("list_src_{%s}", util.SlotTable[testSlot]) require.NoError(t, rdb0.Del(ctx, srcListName).Err()) @@ -1065,13 +1099,13 @@ func TestSlotMigrateDataType(t *testing.T) { require.NoError(t, rdb0.RPush(ctx, srcListName, fmt.Sprintf("element%d", i)).Err()) } - require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", slot1, id1).Val()) - requireMigrateState(t, rdb0, slot1, SlotMigrationStateStarted) + require.Equal(t, "OK", rdb0.Do(ctx, "clusterx", "migrate", testSlot, id1).Val()) + requireMigrateState(t, rdb0, testSlot, SlotMigrationStateStarted) for i := 0; i < 10; i++ { require.NoError(t, rdb0.LMove(ctx, srcListName, srcListName, "RIGHT", "LEFT").Err()) } - waitForMigrateState(t, rdb0, slot1, SlotMigrationStateSuccess) + waitForMigrateState(t, rdb0, testSlot, SlotMigrationStateSuccess) require.ErrorContains(t, rdb0.RPush(ctx, srcListName, "element1000").Err(), "MOVED") require.Equal(t, int64(srcLen), rdb1.LLen(ctx, srcListName).Val()) diff --git a/tests/gocase/unit/debug/debug_test.go b/tests/gocase/unit/debug/debug_test.go index 416e4a7d555..c15fd33dc2f 100644 --- a/tests/gocase/unit/debug/debug_test.go +++ b/tests/gocase/unit/debug/debug_test.go @@ -119,3 +119,27 @@ func TestDebugProtocolV3(t *testing.T) { require.EqualValues(t, false, val) }) } + +func TestDebugDBSizeLimit(t *testing.T) { + srv := util.StartServer(t, map[string]string{}) + defer srv.Close() + + ctx := context.Background() + rdb := srv.NewClient() + defer func() { require.NoError(t, rdb.Close()) }() + + t.Run("debug ignore dbsize check", func(t *testing.T) { + r := rdb.Do(ctx, "SET", "k1", "v1") + require.NoError(t, r.Err()) + + r = rdb.Do(ctx, "DEBUG", "DBSIZE-LIMIT", "1") + require.NoError(t, r.Err()) + + r = rdb.Do(ctx, "SET", "k2", "v2") + require.Error(t, r.Err()) + util.ErrorRegexp(t, r.Err(), "ERR.*not allowed.*") + + r = rdb.Do(ctx, "DEL", "k1") + require.NoError(t, r.Err()) + }) +} diff --git a/tests/gocase/unit/keyspace/keyspace_test.go b/tests/gocase/unit/keyspace/keyspace_test.go index d60fb71f632..704d4475a5a 100644 --- a/tests/gocase/unit/keyspace/keyspace_test.go +++ b/tests/gocase/unit/keyspace/keyspace_test.go @@ -197,7 +197,7 @@ func TestKeyspace(t *testing.T) { }) t.Run("Type a expired key", func(t *testing.T) { - expireTime := time.Second + expireTime := 2 * time.Second key := "foo" require.NoError(t, rdb.Del(ctx, key).Err()) require.Equal(t, "OK", rdb.SetEx(ctx, key, "bar", expireTime).Val()) diff --git a/tests/gocase/unit/protocol/protocol_test.go b/tests/gocase/unit/protocol/protocol_test.go index 61db7cf1bf7..39914a33a94 100644 --- a/tests/gocase/unit/protocol/protocol_test.go +++ b/tests/gocase/unit/protocol/protocol_test.go @@ -151,16 +151,18 @@ func TestProtocolRESP2(t *testing.T) { t.Run("debug protocol string", func(t *testing.T) { types := map[string][]string{ - "string": {"$11", "Hello World"}, - "integer": {":12345"}, - "double": {"$5", "3.141"}, - "array": {"*3", ":0", ":1", ":2"}, - "set": {"*3", ":0", ":1", ":2"}, - "map": {"*6", ":0", ":0", ":1", ":1", ":2", ":0"}, - "bignum": {"$37", "1234567999999999999999999999999999999"}, - "true": {":1"}, - "false": {":0"}, - "null": {"$-1"}, + "string": {"$11", "Hello World"}, + "integer": {":12345"}, + "double": {"$5", "3.141"}, + "array": {"*3", ":0", ":1", ":2"}, + "set": {"*3", ":0", ":1", ":2"}, + "map": {"*6", ":0", ":0", ":1", ":1", ":2", ":0"}, + "bignum": {"$37", "1234567999999999999999999999999999999"}, + "true": {":1"}, + "false": {":0"}, + "null": {"$-1"}, + "attrib": {"|1", "$14", "key-popularity", "*2", "$7", "key:123", ":90"}, + "verbatim": {"$15", "verbatim string"}, } for typ, expected := range types { args := []string{"DEBUG", "PROTOCOL", typ} @@ -207,16 +209,18 @@ func TestProtocolRESP3(t *testing.T) { } types := map[string][]string{ - "string": {"$11", "Hello World"}, - "integer": {":12345"}, - "double": {",3.141"}, - "array": {"*3", ":0", ":1", ":2"}, - "set": {"~3", ":0", ":1", ":2"}, - "map": {"%3", ":0", "#f", ":1", "#t", ":2", "#f"}, - "bignum": {"(1234567999999999999999999999999999999"}, - "true": {"#t"}, - "false": {"#f"}, - "null": {"_"}, + "string": {"$11", "Hello World"}, + "integer": {":12345"}, + "double": {",3.141"}, + "array": {"*3", ":0", ":1", ":2"}, + "set": {"~3", ":0", ":1", ":2"}, + "map": {"%3", ":0", "#f", ":1", "#t", ":2", "#f"}, + "bignum": {"(1234567999999999999999999999999999999"}, + "true": {"#t"}, + "false": {"#f"}, + "null": {"_"}, + "attrib": {"|1", "$14", "key-popularity", "*2", "$7", "key:123", ":90"}, + "verbatim": {"=19", "txt:verbatim string"}, } for typ, expected := range types { args := []string{"DEBUG", "PROTOCOL", typ} diff --git a/tests/gocase/unit/type/bitmap/bitmap_test.go b/tests/gocase/unit/type/bitmap/bitmap_test.go index a8ee50d048d..508f52dd82b 100644 --- a/tests/gocase/unit/type/bitmap/bitmap_test.go +++ b/tests/gocase/unit/type/bitmap/bitmap_test.go @@ -205,6 +205,60 @@ func TestBitmap(t *testing.T) { require.EqualValues(t, maxOffset, cmd.Val()) }) + t.Run("BITCOUNT BIT/BYTE option check(type bitmap bitmap_string)", func(t *testing.T) { + require.NoError(t, rdb.Del(ctx, "foo").Err()) + require.NoError(t, rdb.Do(ctx, "SET", "foo", "hello").Err()) + cmd := rdb.Do(ctx, "BITCOUNT", "foo", 0, -1, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 21, cmd.Val()) + require.NoError(t, rdb.Do(ctx, "SETBIT", "foo", 1024*8+2, 1).Err()) + require.NoError(t, rdb.Do(ctx, "SETBIT", "foo", 2*1024*8+1, 1).Err()) + cmd = rdb.Do(ctx, "BITCOUNT", "foo", 0, -1, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 23, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "foo", 0, 1024*8+2, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 22, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "foo", 40, 1024*8+2, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 1, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "foo", 0, 0, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 0, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "foo", 1024*8+2, 2*1024*8+1, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 2, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "foo", -1, -1, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 0, cmd.Val()) + require.NoError(t, rdb.Del(ctx, "foo").Err()) + + require.NoError(t, rdb.Del(ctx, "bar").Err()) + require.NoError(t, rdb.Do(ctx, "SETBIT", "bar", 0, 1).Err()) + require.NoError(t, rdb.Do(ctx, "SETBIT", "bar", 100, 1).Err()) + require.NoError(t, rdb.Do(ctx, "SETBIT", "bar", 1024*8+2, 1).Err()) + require.NoError(t, rdb.Do(ctx, "SETBIT", "bar", 2*1024*8+1, 1).Err()) + cmd = rdb.Do(ctx, "BITCOUNT", "bar", 0, 0, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 1, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "bar", 0, 100, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 2, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "bar", 100, 1024*8+2, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 2, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "bar", 1024*8+2, 2*1024*8+2, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 2, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "bar", 0, -1, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 4, cmd.Val()) + cmd = rdb.Do(ctx, "BITCOUNT", "bar", -1, -1, "BIT") + require.NoError(t, cmd.Err()) + require.EqualValues(t, 0, cmd.Val()) + require.NoError(t, rdb.Del(ctx, "bar").Err()) + }) + t.Run("BITOP NOT (known string)", func(t *testing.T) { Set2SetBit(t, rdb, ctx, "s", []byte("\xaa\x00\xff\x55")) require.NoError(t, rdb.BitOpNot(ctx, "dest", "s").Err()) diff --git a/tests/gocase/unit/type/json/json_test.go b/tests/gocase/unit/type/json/json_test.go index a849d46d4d5..a1489f7acd4 100644 --- a/tests/gocase/unit/type/json/json_test.go +++ b/tests/gocase/unit/type/json/json_test.go @@ -39,27 +39,27 @@ func TestJson(t *testing.T) { t.Run("JSON.SET and JSON.GET basics", func(t *testing.T) { require.Error(t, rdb.Do(ctx, "JSON.SET", "a").Err()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", ` {"x":1, "y":2} `).Err()) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a").Val(), `{"x":1,"y":2}`) + EqualJSON(t, `{"x":1,"y":2}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$.y", `233`).Err()) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a").Val(), `{"x":1,"y":233}`) + EqualJSON(t, `{"x":1,"y":233}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `[[1], [2]]`).Err()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$[*][0]", "3").Err()) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a").Val(), `[[3],[3]]`) + EqualJSON(t, `[[3],[3]]`, rdb.Do(ctx, "JSON.GET", "a").Val()) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "$").Val(), `[[[3],[3]]]`) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "$[0]").Val(), `[[3]]`) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "$[0][0]").Val(), `[3]`) + EqualJSON(t, `[[[3],[3]]]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, `[[3]]`, rdb.Do(ctx, "JSON.GET", "a", "$[0]").Val()) + EqualJSON(t, `[3]`, rdb.Do(ctx, "JSON.GET", "a", "$[0][0]").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"x":1,"y":{"x":{"y":2},"y":3}}`).Err()) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a").Val(), `{"x":1,"y":{"x":{"y":2},"y":3}}`) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "$").Val(), `[{"x":1,"y":{"x":{"y":2},"y":3}}]`) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "$..x").Val(), `[1,{"y":2}]`) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "$..x", "$..y").Val(), `{"$..x":[1,{"y":2}],"$..y":[{"x":{"y":2},"y":3},3,2]}`) + EqualJSON(t, `{"x":1,"y":{"x":{"y":2},"y":3}}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `[{"x":1,"y":{"x":{"y":2},"y":3}}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, `[1,{"y":2}]`, rdb.Do(ctx, "JSON.GET", "a", "$..x").Val()) + EqualJSON(t, `{"$..x":[1,{"y":2}],"$..y":[{"x":{"y":2},"y":3},3,2]}`, rdb.Do(ctx, "JSON.GET", "a", "$..x", "$..y").Val()) require.Equal(t, rdb.Do(ctx, "JSON.GET", "no-such-key").Val(), nil) - require.Equal(t, rdb.Type(ctx, "a").Val(), "ReJSON-RL") + require.Equal(t, "ReJSON-RL", rdb.Type(ctx, "a").Val()) }) t.Run("JSON.DEL and JSON.FORGET basics", func(t *testing.T) { @@ -67,11 +67,11 @@ func TestJson(t *testing.T) { for _, command := range []string{"JSON.DEL", "JSON.FORGET"} { require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"x": 1, "nested": {"x": 2, "y": 3}}`).Err()) require.EqualValues(t, 2, rdb.Do(ctx, command, "a", "$..x").Val()) - require.Equal(t, `[{"nested":{"y":3}}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, `[{"nested":{"y":3}}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"x": 1, "nested": {"x": 2, "y": 3}}`).Err()) require.EqualValues(t, 1, rdb.Do(ctx, command, "a", "$.x").Val()) - require.Equal(t, `[{"nested":{"x":2,"y":3}}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, `[{"nested":{"x":2,"y":3}}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) require.EqualValues(t, 1, rdb.Do(ctx, command, "a", "$").Val()) require.EqualValues(t, 0, rdb.Do(ctx, command, "no-such-json-key", "$").Val()) @@ -80,11 +80,11 @@ func TestJson(t *testing.T) { t.Run("JSON.GET with options", func(t *testing.T) { require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", ` {"x":1, "y":2} `).Err()) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "INDENT", " ").Val(), `{ "x":1, "y":2}`) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "INDENT", " ", "SPACE", " ").Val(), `{ "x": 1, "y": 2}`) - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "NEWLINE", "\n").Val(), "{\n\"x\":1,\n\"y\":2\n}") - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "NEWLINE", "\n", "INDENT", " ", "SPACE", " ").Val(), "{\n \"x\": 1,\n \"y\": 2\n}") - require.Equal(t, rdb.Do(ctx, "JSON.GET", "a", "INDENT", " ", "$").Val(), `[ { "x":1, "y":2 }]`) + require.Equal(t, `{ "x":1, "y":2}`, rdb.Do(ctx, "JSON.GET", "a", "INDENT", " ").Val()) + require.Equal(t, `{ "x": 1, "y": 2}`, rdb.Do(ctx, "JSON.GET", "a", "INDENT", " ", "SPACE", " ").Val()) + require.Equal(t, "{\n\"x\":1,\n\"y\":2\n}", rdb.Do(ctx, "JSON.GET", "a", "NEWLINE", "\n").Val()) + require.Equal(t, "{\n \"x\": 1,\n \"y\": 2\n}", rdb.Do(ctx, "JSON.GET", "a", "NEWLINE", "\n", "INDENT", " ", "SPACE", " ").Val()) + require.Equal(t, `[ { "x":1, "y":2 }]`, rdb.Do(ctx, "JSON.GET", "a", "INDENT", " ", "$").Val()) }) t.Run("JSON storage format CBOR", func(t *testing.T) { @@ -94,8 +94,8 @@ func TestJson(t *testing.T) { require.NoError(t, rdb.Do(ctx, "CONFIG", "SET", "json-storage-format", "cbor").Err()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "b", "$", `{"x":1, "y":2}`).Err()) require.Equal(t, "cbor", rdb.Do(ctx, "JSON.INFO", "b").Val().([]interface{})[1]) - require.Equal(t, `{"x":1,"y":2}`, rdb.Do(ctx, "JSON.GET", "b").Val()) - require.Equal(t, `{"x":1,"y":2}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `{"x":1,"y":2}`, rdb.Do(ctx, "JSON.GET", "b").Val()) + EqualJSON(t, `{"x":1,"y":2}`, rdb.Do(ctx, "JSON.GET", "a").Val()) }) t.Run("JSON.ARRAPPEND basics", func(t *testing.T) { @@ -112,19 +112,19 @@ func TestJson(t *testing.T) { require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", ` {"x":1, "y": {"x":[]} } `).Err()) require.Equal(t, []interface{}{int64(1), nil}, rdb.Do(ctx, "JSON.ARRAPPEND", "a", "$..x", `1`).Val()) - require.Equal(t, `[{"x":1,"y":{"x":[1]}}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, `[{"x":1,"y":{"x":[1]}}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", ` {"x":[], "y":[]} `).Err()) require.Equal(t, []interface{}{int64(1)}, rdb.Do(ctx, "JSON.ARRAPPEND", "a", "$.x", `1`).Val()) - require.Equal(t, `{"x":[1],"y":[]}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `{"x":[1],"y":[]}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.Equal(t, []interface{}{int64(4)}, rdb.Do(ctx, "JSON.ARRAPPEND", "a", "$.x", `1`, `2`, `3`).Val()) require.Equal(t, []interface{}{int64(1)}, rdb.Do(ctx, "JSON.ARRAPPEND", "a", "$.y", ` {"x":[], "y":[]} `).Val()) - require.Equal(t, `[{"x":[1,1,2,3],"y":[{"x":[],"y":[]}]}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, `[{"x":[1,1,2,3],"y":[{"x":[],"y":[]}]}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) require.Equal(t, []interface{}{int64(2), int64(6)}, rdb.Do(ctx, "JSON.ARRAPPEND", "a", "$..x", `1`, `2`).Val()) - require.Equal(t, `[[1,2]]`, rdb.Do(ctx, "JSON.GET", "a", "$.y[0].x").Val()) - require.Equal(t, `[]`, rdb.Do(ctx, "JSON.GET", "a", "$.x.x").Val()) - require.Equal(t, `[{"x":[1,1,2,3,1,2],"y":[{"x":[1,2],"y":[]}]}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, `[[1,2]]`, rdb.Do(ctx, "JSON.GET", "a", "$.y[0].x").Val()) + EqualJSON(t, `[]`, rdb.Do(ctx, "JSON.GET", "a", "$.x.x").Val()) + EqualJSON(t, `[{"x":[1,1,2,3,1,2],"y":[{"x":[1,2],"y":[]}]}]`, rdb.Do(ctx, "JSON.GET", "a", "$").Val()) }) t.Run("JSON.TYPE basics", func(t *testing.T) { @@ -185,38 +185,38 @@ func TestJson(t *testing.T) { t.Run("Merge basics", func(t *testing.T) { require.NoError(t, rdb.Do(ctx, "JSON.SET", "key", "$", `{"a":2}`).Err()) require.NoError(t, rdb.Do(ctx, "JSON.MERGE", "key", "$.a", `3`).Err()) - require.Equal(t, `{"a":3}`, rdb.Do(ctx, "JSON.GET", "key").Val()) + EqualJSON(t, `{"a":3}`, rdb.Do(ctx, "JSON.GET", "key").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "key", "$", `{"a":2}`).Err()) require.NoError(t, rdb.Do(ctx, "JSON.MERGE", "key", "$.a", `null`).Err()) - require.Equal(t, `{}`, rdb.Do(ctx, "JSON.GET", "key").Val()) + EqualJSON(t, `{}`, rdb.Do(ctx, "JSON.GET", "key").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "key", "$", `{"a":[2,4,6,8]}`).Err()) require.NoError(t, rdb.Do(ctx, "JSON.MERGE", "key", "$.a", `[10,12]`).Err()) - require.Equal(t, `{"a":[10,12]}`, rdb.Do(ctx, "JSON.GET", "key").Val()) + EqualJSON(t, `{"a":[10,12]}`, rdb.Do(ctx, "JSON.GET", "key").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "key", "$", `{"f1": {"a":1}, "f2":{"a":2}}`).Err()) - require.Equal(t, `{"f1":{"a":1},"f2":{"a":2}}`, rdb.Do(ctx, "JSON.GET", "key").Val()) + EqualJSON(t, `{"f1":{"a":1},"f2":{"a":2}}`, rdb.Do(ctx, "JSON.GET", "key").Val()) require.NoError(t, rdb.Do(ctx, "JSON.MERGE", "key", "$", `{"f1": null, "f2":{"a":3, "b":4}, "f3":[2,4,6]}`).Err()) - require.Equal(t, `{"f2":{"a":3,"b":4},"f3":[2,4,6]}`, rdb.Do(ctx, "JSON.GET", "key").Val()) + EqualJSON(t, `{"f2":{"a":3,"b":4},"f3":[2,4,6]}`, rdb.Do(ctx, "JSON.GET", "key").Val()) }) t.Run("Clear JSON values", func(t *testing.T) { require.NoError(t, rdb.Do(ctx, "JSON.SET", "bb", "$", `{"obj":{"a":1, "b":2}, "arr":[1,2,3], "str": "foo", "bool": true, "int": 42, "float": 3.14}`).Err()) require.NoError(t, rdb.Do(ctx, "JSON.CLEAR", "bb", "$").Err()) - require.Equal(t, `{}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) + EqualJSON(t, `{}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "bb", "$", `{"obj":{"a":1, "b":2}, "arr":[1,2,3], "str": "foo", "bool": true, "int": 42, "float": 3.14}`).Err()) require.NoError(t, rdb.Do(ctx, "JSON.CLEAR", "bb", "$.obj").Err()) - require.Equal(t, `{"arr":[1,2,3],"bool":true,"float":3.14,"int":42,"obj":{},"str":"foo"}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) + EqualJSON(t, `{"arr":[1,2,3],"bool":true,"float":3.14,"int":42,"obj":{},"str":"foo"}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) require.NoError(t, rdb.Do(ctx, "JSON.CLEAR", "bb", "$.arr").Err()) - require.Equal(t, `{"arr":[],"bool":true,"float":3.14,"int":42,"obj":{},"str":"foo"}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) + EqualJSON(t, `{"arr":[],"bool":true,"float":3.14,"int":42,"obj":{},"str":"foo"}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "bb", "$", `{"obj":{"a":1, "b":2}, "arr":[1,2,3], "str": "foo", "bool": true, "int": 42, "float": 3.14}`).Err()) require.NoError(t, rdb.Do(ctx, "JSON.CLEAR", "bb", "$.*").Err()) - require.Equal(t, `{"arr":[],"bool":true,"float":0,"int":0,"obj":{},"str":"foo"}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) + EqualJSON(t, `{"arr":[],"bool":true,"float":0,"int":0,"obj":{},"str":"foo"}`, rdb.Do(ctx, "JSON.GET", "bb").Val()) _, err := rdb.Do(ctx, "JSON.CLEAR", "bb", "$.some").Result() require.NoError(t, err) @@ -276,28 +276,28 @@ func TestJson(t *testing.T) { // json path has one array require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"a":[1,2,3], "b":{"a":[4,5,6,7],"c":2},"c":[1,2,3,4],"e":[6,7,8],"f":{"a":[10,11,12,13,14], "g":2}}`).Err()) require.EqualValues(t, []interface{}{int64(4)}, rdb.Do(ctx, arrInsertCmd, "a", "$.e", 1, 90).Val()) - require.Equal(t, "[{\"a\":[1,2,3],\"b\":{\"a\":[4,5,6,7],\"c\":2},\"c\":[1,2,3,4],\"e\":[6,90,7,8],\"f\":{\"a\":[10,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,2,3],\"b\":{\"a\":[4,5,6,7],\"c\":2},\"c\":[1,2,3,4],\"e\":[6,90,7,8],\"f\":{\"a\":[10,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) // insert many value require.EqualValues(t, []interface{}{int64(8)}, rdb.Do(ctx, arrInsertCmd, "a", "$.e", 2, 80, 81, 82, 83).Val()) - require.Equal(t, "[{\"a\":[1,2,3],\"b\":{\"a\":[4,5,6,7],\"c\":2},\"c\":[1,2,3,4],\"e\":[6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,2,3],\"b\":{\"a\":[4,5,6,7],\"c\":2},\"c\":[1,2,3,4],\"e\":[6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) // json path has many array require.EqualValues(t, []interface{}{int64(6), int64(5), int64(4)}, rdb.Do(ctx, arrInsertCmd, "a", "$..a", 1, 91).Val()) - require.Equal(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[1,2,3,4],\"e\":[6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[1,2,3,4],\"e\":[6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) // json path has many array and one is not array require.EqualValues(t, []interface{}{int64(5), nil}, rdb.Do(ctx, arrInsertCmd, "a", "$..c", 0, 92).Val()) - require.Equal(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) // index = 0 require.EqualValues(t, []interface{}{int64(9)}, rdb.Do(ctx, arrInsertCmd, "a", "$.e", 0, 93).Val()) - require.Equal(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) // index < 0 require.EqualValues(t, []interface{}{int64(10)}, rdb.Do(ctx, arrInsertCmd, "a", "$.e", -2, 94).Val()) - require.Equal(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,94,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,94,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) // index >= len require.EqualValues(t, []interface{}{nil}, rdb.Do(ctx, arrInsertCmd, "a", "$.e", 15, 95).Val()) - require.Equal(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,94,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,94,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) // index + len < 0 require.EqualValues(t, []interface{}{nil}, rdb.Do(ctx, arrInsertCmd, "a", "$", -15, 96).Val()) - require.Equal(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,94,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) + EqualJSON(t, "[{\"a\":[1,91,2,3],\"b\":{\"a\":[4,91,5,6,7],\"c\":2},\"c\":[92,1,2,3,4],\"e\":[93,6,90,80,81,82,83,94,7,8],\"f\":{\"a\":[10,91,11,12,13,14],\"g\":2}}]", rdb.Do(ctx, "JSON.GET", "a", "$").Val()) }) @@ -441,27 +441,27 @@ func TestJson(t *testing.T) { require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"bool":true}`).Err()) require.EqualValues(t, []interface{}{int64(0)}, rdb.Do(ctx, "JSON.TOGGLE", "a", "$.bool").Val()) - require.Equal(t, `{"bool":false}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `{"bool":false}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"bool":true,"bools":{"bool":true}}`).Err()) require.EqualValues(t, []interface{}{int64(0)}, rdb.Do(ctx, "JSON.TOGGLE", "a", "$.bool").Val()) - require.Equal(t, `{"bool":false,"bools":{"bool":true}}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `{"bool":false,"bools":{"bool":true}}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"bool":true,"bools":{"bool":true}}`).Err()) require.EqualValues(t, []interface{}{int64(0), int64(0)}, rdb.Do(ctx, "JSON.TOGGLE", "a", "$..bool").Val()) - require.Equal(t, `{"bool":false,"bools":{"bool":false}}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `{"bool":false,"bools":{"bool":false}}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"bool":false,"bools":{"bool":true}}`).Err()) require.EqualValues(t, []interface{}{int64(1), int64(0)}, rdb.Do(ctx, "JSON.TOGGLE", "a", "$..bool").Val()) - require.Equal(t, `{"bool":true,"bools":{"bool":false}}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `{"bool":true,"bools":{"bool":false}}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"incorrectbool":99,"bools":{"bool":true},"bool":{"bool":false}}`).Err()) require.EqualValues(t, []interface{}{nil, int64(1), int64(0)}, rdb.Do(ctx, "JSON.TOGGLE", "a", "$..bool").Val()) - require.Equal(t, `{"bool":{"bool":true},"bools":{"bool":false},"incorrectbool":99}`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `{"bool":{"bool":true},"bools":{"bool":false},"incorrectbool":99}`, rdb.Do(ctx, "JSON.GET", "a").Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `[99,true,99]`).Err()) require.EqualValues(t, []interface{}{nil, int64(0), nil}, rdb.Do(ctx, "JSON.TOGGLE", "a", "$..*").Val()) - require.Equal(t, `[99,false,99]`, rdb.Do(ctx, "JSON.GET", "a").Val()) + EqualJSON(t, `[99,false,99]`, rdb.Do(ctx, "JSON.GET", "a").Val()) }) t.Run("JSON.ARRINDEX basics", func(t *testing.T) { @@ -505,23 +505,23 @@ func TestJson(t *testing.T) { t.Run("JSON.NUMOP basics", func(t *testing.T) { require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{ "foo": 0, "bar": "baz" }`).Err()) - require.Equal(t, `[1]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 1).Val()) - require.Equal(t, `[1]`, rdb.Do(ctx, "JSON.GET", "a", "$.foo").Val()) - require.Equal(t, `[3]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 2).Val()) - require.Equal(t, `[3.5]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 0.5).Val()) + EqualJSON(t, `[1]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 1).Val()) + EqualJSON(t, `[1]`, rdb.Do(ctx, "JSON.GET", "a", "$.foo").Val()) + EqualJSON(t, `[3]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 2).Val()) + EqualJSON(t, `[3.5]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 0.5).Val()) // wrong type require.Equal(t, `[null]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.bar", 1).Val()) - require.Equal(t, `[]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.fuzz", 1).Val()) + EqualJSON(t, `[]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.fuzz", 1).Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `0`).Err()) - require.Equal(t, `[1]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$", 1).Val()) - require.Equal(t, `[2.5]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$", 1.5).Val()) + EqualJSON(t, `[1]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$", 1).Val()) + EqualJSON(t, `[2.5]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$", 1.5).Val()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "a", "$", `{"foo":0,"bar":42}`).Err()) - require.Equal(t, `[1]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 1).Val()) - require.Equal(t, `[84]`, rdb.Do(ctx, "JSON.NUMMULTBY", "a", "$.bar", 2).Val()) + EqualJSON(t, `[1]`, rdb.Do(ctx, "JSON.NUMINCRBY", "a", "$.foo", 1).Val()) + EqualJSON(t, `[84]`, rdb.Do(ctx, "JSON.NUMMULTBY", "a", "$.bar", 2).Val()) // overflow case require.NoError(t, rdb.Do(ctx, "JSON.SET", "big_num", "$", "1.6350000000001313e+308").Err()) @@ -605,7 +605,6 @@ func TestJson(t *testing.T) { require.Equal(t, 2, len(vals)) require.EqualValues(t, "[null]", vals[0]) require.EqualValues(t, "[null]", vals[1]) - vals, err = rdb.Do(ctx, "JSON.MGET", "a0", "a1", "$.nonexists").Slice() require.NoError(t, err) require.Equal(t, 2, len(vals)) @@ -614,3 +613,7 @@ func TestJson(t *testing.T) { }) } + +func EqualJSON(t *testing.T, expected string, actual interface{}) { + require.JSONEq(t, expected, actual.(string)) +} diff --git a/tests/gocase/unit/type/stream/stream_test.go b/tests/gocase/unit/type/stream/stream_test.go index 7dee10b6b3e..7bbce02b6c2 100644 --- a/tests/gocase/unit/type/stream/stream_test.go +++ b/tests/gocase/unit/type/stream/stream_test.go @@ -985,6 +985,21 @@ func TestStreamOffset(t *testing.T) { r1 = rdb.XInfoConsumers(ctx, streamName, group2).Val() require.Equal(t, consumer3, r1[0].Name) }) + + t.Run("XREAD After XGroupCreate and XGroupCreateConsumer, for issue #2109", func(t *testing.T) { + streamName := "test-stream" + group := "group" + require.NoError(t, rdb.XAdd(ctx, &redis.XAddArgs{ + Stream: streamName, + ID: "*", + Values: []string{"data1", "b"}, + }).Err()) + require.NoError(t, rdb.XGroupCreate(ctx, streamName, group, "0").Err()) + require.NoError(t, rdb.XGroupCreateConsumer(ctx, streamName, group, "consumer").Err()) + require.NoError(t, rdb.XRead(ctx, &redis.XReadArgs{ + Streams: []string{streamName, "0"}, + }).Err()) + }) } func parseStreamEntryID(id string) (ts int64, seqNum int64) { diff --git a/tests/gocase/unit/type/strings/strings_test.go b/tests/gocase/unit/type/strings/strings_test.go index fc799fc5cd8..e3474b05e85 100644 --- a/tests/gocase/unit/type/strings/strings_test.go +++ b/tests/gocase/unit/type/strings/strings_test.go @@ -388,6 +388,12 @@ func TestString(t *testing.T) { require.NoError(t, rdb.SetBit(ctx, "mykey", maxOffset, 1).Err()) require.EqualValues(t, 1, rdb.GetBit(ctx, "mykey", maxOffset).Val()) require.EqualValues(t, 1, rdb.BitCount(ctx, "mykey", &redis.BitCount{Start: 0, End: maxOffset / 8}).Val()) + // Last byte should contain 1 bit. + require.EqualValues(t, 1, rdb.BitCount(ctx, "mykey", &redis.BitCount{Start: -1, End: -1}).Val()) + // 0 - Last byte should contain 1 bit. + require.EqualValues(t, 1, rdb.BitCount(ctx, "mykey", &redis.BitCount{Start: -100, End: -1}).Val()) + // The first byte shouldn't contain any bits + require.EqualValues(t, 0, rdb.BitCount(ctx, "mykey", &redis.BitCount{Start: -100, End: -100}).Val()) require.EqualValues(t, maxOffset, rdb.BitPos(ctx, "mykey", 1).Val()) })