apache · PragmaTwice · Oct 29, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 17, 2024
diff --git a/.github/config/typos.toml b/.github/config/typos.toml
@@ -20,6 +20,11 @@ extend-exclude = [
     ".git/",
     "src/vendor/",
     "tests/gocase/util/slot.go",
+
+    # Uses short strings for testing glob matching
+    "tests/cppunit/glob_util_test.cc",
+    "tests/gocase/unit/keyspace/keyspace_test.go",
+    "tests/gocase/unit/keyspace/scan_test.go",
 ]
 ignore-hidden = false
 

diff --git a/src/commands/cmd_server.cc b/src/commands/cmd_server.cc
@@ -21,8 +21,10 @@
 #include "command_parser.h"
 #include "commander.h"
 #include "commands/scan_base.h"
+#include "common/glob.h"
 #include "common/io_util.h"
 #include "common/rdb_stream.h"
+#include "common/time_util.h"
 #include "config/config.h"
 #include "error_constants.h"
 #include "server/redis_connection.h"
@@ -31,7 +33,6 @@
 #include "stats/disk_stats.h"
 #include "storage/rdb/rdb.h"
 #include "string_util.h"
-#include "time_util.h"
 
 namespace redis {
 
@@ -114,15 +115,12 @@ class CommandNamespace : public Commander {
 class CommandKeys : public Commander {
  public:
   Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override {
-    const std::string &prefix = args_[1];
+    const std::string &glob_pattern = args_[1];
     std::vector<std::string> keys;
     redis::Database redis(srv->storage, conn->GetNamespace());
 
-    if (prefix.empty() || prefix.find('*') != prefix.size() - 1) {
-      return {Status::RedisExecErr, "only keys prefix match was supported"};
-    }
-
-    const rocksdb::Status s = redis.Keys(ctx, prefix.substr(0, prefix.size() - 1), &keys);
+    const auto [prefix, suffix_glob] = util::SplitGlob(glob_pattern);
+    const rocksdb::Status s = redis.Keys(ctx, prefix, suffix_glob, &keys);
     if (!s.ok()) {
       return {Status::RedisExecErr, s.ToString()};
     }
@@ -848,7 +846,7 @@ class CommandScan : public CommandScanBase {
     std::vector<std::string> keys;
     std::string end_key;
 
-    auto s = redis_db.Scan(ctx, key_name, limit_, prefix_, &keys, &end_key, type_);
+    const auto s = redis_db.Scan(ctx, key_name, limit_, prefix_, suffix_glob_, &keys, &end_key, type_);
     if (!s.ok()) {
       return {Status::RedisExecErr, s.ToString()};
     }

diff --git a/src/commands/scan_base.h b/src/commands/scan_base.h
@@ -23,6 +23,7 @@
 #include "commander.h"
 #include "commands/command_parser.h"
 #include "error_constants.h"
+#include "glob.h"
 #include "parse_util.h"
 #include "server/server.h"
 
@@ -44,14 +45,8 @@ class CommandScanBase : public Commander {
   Status ParseAdditionalFlags(Parser &parser) {
     while (parser.Good()) {
       if (parser.EatEqICase("match")) {
-        prefix_ = GET_OR_RET(parser.TakeStr());
-        // The match pattern should contain exactly one '*' at the end; remove the * to
-        // get the prefix to match.
-        if (!prefix_.empty() && prefix_.find('*') == prefix_.size() - 1) {
-          prefix_.pop_back();
-        } else {
-          return {Status::RedisParseErr, "currently only key prefix matching is supported"};
-        }
+        const std::string glob_pattern = GET_OR_RET(parser.TakeStr());
+        std::tie(prefix_, suffix_glob_) = util::SplitGlob(glob_pattern);
       } else if (parser.EatEqICase("count")) {
         limit_ = GET_OR_RET(parser.TakeInt());
         if (limit_ <= 0) {
@@ -100,6 +95,7 @@ class CommandScanBase : public Commander {
  protected:
   std::string cursor_;
   std::string prefix_;
+  std::string suffix_glob_;
   int limit_ = 20;
   RedisType type_ = kRedisNone;
 };

diff --git a/src/common/glob.h b/src/common/glob.h
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <cctype>
+#include <string>
+#include <string_view>
+
+namespace util {
+
+constexpr inline bool GlobMatchImpl(std::string_view pattern, std::string_view string, bool ignore_case,
+                                    bool *skip_longer_matches, size_t recursion_depth = 0) {
+  // If we want to ignore case, this is equivalent to converting both the pattern and the string to lowercase
+  const auto canonicalize = [ignore_case](unsigned char c) -> unsigned char {
+    return ignore_case ? static_cast<unsigned char>(std::tolower(c)) : c;
+  };
+
+  if (recursion_depth > 1000) return false;
+
+  while (!pattern.empty() && !string.empty()) {
+    switch (pattern[0]) {
+      case '*':
+        // Optimization: collapse multiple * into one
+        while (pattern.size() >= 2 && pattern[1] == '*') {
+          pattern.remove_prefix(1);
+        }
+        // Optimization: If the '*' is the last character in the pattern, it can match anything
+        if (pattern.length() == 1) return true;
+        while (!string.empty()) {
+          if (GlobMatchImpl(pattern.substr(1), string, ignore_case, skip_longer_matches, recursion_depth + 1))
+            return true;
+          if (*skip_longer_matches) return false;
+          string.remove_prefix(1);
+        }
+        // There was no match for the rest of the pattern starting
+        // from anywhere in the rest of the string. If there were
+        // any '*' earlier in the pattern, we can terminate the
+        // search early without trying to match them to longer
+        // substrings. This is because a longer match for the
+        // earlier part of the pattern would require the rest of the
+        // pattern to match starting later in the string, and we
+        // have just determined that there is no match for the rest
+        // of the pattern starting from anywhere in the current
+        // string.
+        *skip_longer_matches = true;
+        return false;
+      case '?':
+        if (string.empty()) return false;
+        string.remove_prefix(1);
+        break;
+      case '[': {
+        pattern.remove_prefix(1);
+        const bool invert = pattern[0] == '^';
+        if (invert) pattern.remove_prefix(1);
+
+        bool match = false;
+        while (true) {
+          if (pattern.empty()) {
+            // unterminated [ group: reject invalid pattern
+            return false;
+          } else if (pattern[0] == ']') {
+            break;
+          } else if (pattern.length() >= 2 && pattern[0] == '\\') {
+            pattern.remove_prefix(1);
+            if (pattern[0] == string[0]) match = true;
+          } else if (pattern.length() >= 3 && pattern[1] == '-') {
+            unsigned char start = canonicalize(pattern[0]);
+            unsigned char end = canonicalize(pattern[2]);
+            if (start > end) std::swap(start, end);
+            const int c = canonicalize(string[0]);
+            pattern.remove_prefix(2);
+
+            if (c >= start && c <= end) match = true;
+          } else if (canonicalize(pattern[0]) == canonicalize(string[0])) {
+            match = true;
+          }
+          pattern.remove_prefix(1);
+        }
+        if (invert) match = !match;
+        if (!match) return false;
+        string.remove_prefix(1);
+        break;
+      }
+      case '\\':
+        if (pattern.length() >= 2) {
+          pattern.remove_prefix(1);
+        }
+        [[fallthrough]];
+      default:
+        // Just a normal character
+        if (!ignore_case) {
+          if (pattern[0] != string[0]) return false;
+        } else {
+          if (std::tolower((int)pattern[0]) != std::tolower((int)string[0])) return false;
+        }
+        string.remove_prefix(1);
+        break;
+    }
+    pattern.remove_prefix(1);
+  }
+
+  // Now that either the pattern is empty or the string is empty, this is a match iff
+  // the pattern consists only of '*', and the string is empty.
+  return string.empty() && std::all_of(pattern.begin(), pattern.end(), [](char c) { return c == '*'; });
+}
+
+// Given a glob [pattern] and a string [string], return true iff the string matches the glob.
+// If [ignore_case] is true, the match is case-insensitive.
+constexpr inline bool GlobMatches(std::string_view glob, std::string_view str, bool ignore_case = false) {
+  bool skip_longer_matches = false;
+  return GlobMatchImpl(glob, str, ignore_case, &skip_longer_matches);
+}
+
+// Split a glob pattern into a literal prefix and a suffix containing wildcards.
+// For example, if the user calls [KEYS bla*bla], this function will return {"bla", "*bla"}.
+// This allows the caller of this function to optimize this call by performing a
+// prefix-scan on "bla" and then filtering the results using the GlobMatches function.
+inline std::pair<std::string, std::string> SplitGlob(std::string_view glob) {
+  // Stores the prefix of the glob pattern, with backslashes removed
+  std::string prefix;
+  // Find the first un-escaped '*', '?' or '[' character in [glob]
+  for (size_t idx = 0; idx < glob.size(); ++idx) {
+    if (glob[idx] == '*' || glob[idx] == '?' || glob[idx] == '[') {
+      // Return a pair of views: the part of the glob before the wildcard, and the part after
+      return {prefix, std::string(glob.substr(idx))};
+    } else if (glob[idx] == '\\') {
+      // Skip checking whether the next character is a special character
+      ++idx;
+      // Append the escaped special character to the prefix
+      prefix.push_back(glob[idx]);
+    } else {
+      prefix.push_back(glob[idx]);
+    }
+  }
+  // No wildcard found, return the entire string (without the backslashes) as the prefix
+  return {prefix, ""};
+}
+
+}  // namespace util
diff --git a/src/storage/redis_db.cc b/src/storage/redis_db.cc
@@ -21,16 +21,15 @@
 #include "redis_db.h"
 
 #include <ctime>
-#include <map>
 #include <utility>
 
 #include "cluster/redis_slot.h"
 #include "common/scope_exit.h"
 #include "db_util.h"
+#include "glob.h"
 #include "parse_util.h"
 #include "rocksdb/iterator.h"
 #include "rocksdb/status.h"
-#include "server/server.h"
 #include "storage/iterator.h"
 #include "storage/redis_metadata.h"
 #include "storage/storage.h"
@@ -249,11 +248,11 @@ rocksdb::Status Database::GetExpireTime(engine::Context &ctx, const Slice &user_
 }
 
 rocksdb::Status Database::GetKeyNumStats(engine::Context &ctx, const std::string &prefix, KeyNumStats *stats) {
-  return Keys(ctx, prefix, nullptr, stats);
+  return Keys(ctx, prefix, "*", nullptr, stats);
 }
 
-rocksdb::Status Database::Keys(engine::Context &ctx, const std::string &prefix, std::vector<std::string> *keys,
-                               KeyNumStats *stats) {
+rocksdb::Status Database::Keys(engine::Context &ctx, const std::string &prefix, const std::string &suffix_glob,
+                               std::vector<std::string> *keys, KeyNumStats *stats) {
   uint16_t slot_id = 0;
   std::string ns_prefix;
   if (namespace_ != kDefaultNamespace || keys != nullptr) {
@@ -277,6 +276,10 @@ rocksdb::Status Database::Keys(engine::Context &ctx, const std::string &prefix,
       if (!ns_prefix.empty() && !iter->key().starts_with(ns_prefix)) {
         break;
       }
+      auto [_, user_key] = ExtractNamespaceKey(iter->key(), storage_->IsSlotIdEncoded());
+      if (!util::GlobMatches(suffix_glob, user_key.ToString().substr(prefix.size()))) {
+        continue;
+      }
       Metadata metadata(kRedisNone, false);
       auto s = metadata.Decode(iter->value());
       if (!s.ok()) continue;
@@ -293,7 +296,6 @@ rocksdb::Status Database::Keys(engine::Context &ctx, const std::string &prefix,
         }
       }
       if (keys) {
-        auto [_, user_key] = ExtractNamespaceKey(iter->key(), storage_->IsSlotIdEncoded());
         keys->emplace_back(user_key.ToString());
       }
     }
@@ -319,8 +321,8 @@ rocksdb::Status Database::Keys(engine::Context &ctx, const std::string &prefix,
 }
 
 rocksdb::Status Database::Scan(engine::Context &ctx, const std::string &cursor, uint64_t limit,
-                               const std::string &prefix, std::vector<std::string> *keys, std::string *end_cursor,
-                               RedisType type) {
+                               const std::string &prefix, const std::string &suffix_glob,
+                               std::vector<std::string> *keys, std::string *end_cursor, RedisType type) {
   end_cursor->clear();
   uint64_t cnt = 0;
   uint16_t slot_start = 0;
@@ -366,6 +368,10 @@ rocksdb::Status Database::Scan(engine::Context &ctx, const std::string &cursor,
 
       if (metadata.Expired()) continue;
       std::tie(std::ignore, user_key) = ExtractNamespaceKey<std::string>(iter->key(), storage_->IsSlotIdEncoded());
+
+      if (!util::GlobMatches(suffix_glob, user_key.substr(prefix.size()))) {
+        continue;
+      }
       keys->emplace_back(user_key);
       cnt++;
     }
@@ -395,7 +401,7 @@ rocksdb::Status Database::Scan(engine::Context &ctx, const std::string &cursor,
         if (iter->Valid()) {
           std::tie(std::ignore, user_key) = ExtractNamespaceKey<std::string>(iter->key(), storage_->IsSlotIdEncoded());
           auto res = std::mismatch(prefix.begin(), prefix.end(), user_key.begin());
-          if (res.first == prefix.end()) {
+          if (res.first == prefix.end() && util::GlobMatches(suffix_glob, user_key.substr(prefix.size()))) {
             keys->emplace_back(user_key);
           }
 
@@ -420,13 +426,13 @@ rocksdb::Status Database::RandomKey(engine::Context &ctx, const std::string &cur
 
   std::string end_cursor;
   std::vector<std::string> keys;
-  auto s = Scan(ctx, cursor, RANDOM_KEY_SCAN_LIMIT, "", &keys, &end_cursor);
+  auto s = Scan(ctx, cursor, RANDOM_KEY_SCAN_LIMIT, "", "*", &keys, &end_cursor);
   if (!s.ok()) {
     return s;
   }
   if (keys.empty() && !cursor.empty()) {
     // if reach the end, restart from beginning
-    s = Scan(ctx, "", RANDOM_KEY_SCAN_LIMIT, "", &keys, &end_cursor);
+    s = Scan(ctx, "", RANDOM_KEY_SCAN_LIMIT, "", "*", &keys, &end_cursor);
     if (!s.ok()) {
       return s;
     }

diff --git a/src/storage/redis_db.h b/src/storage/redis_db.h
@@ -20,7 +20,6 @@
 
 #pragma once
 
-#include <map>
 #include <optional>
 #include <string>
 #include <utility>
@@ -29,7 +28,6 @@
 
 #include "cluster/cluster_defs.h"
 #include "redis_metadata.h"
-#include "server/redis_reply.h"
 #include "storage.h"
 
 namespace redis {
@@ -119,11 +117,12 @@ class Database {
   [[nodiscard]] rocksdb::Status FlushDB(engine::Context &ctx);
   [[nodiscard]] rocksdb::Status FlushAll(engine::Context &ctx);
   [[nodiscard]] rocksdb::Status GetKeyNumStats(engine::Context &ctx, const std::string &prefix, KeyNumStats *stats);
-  [[nodiscard]] rocksdb::Status Keys(engine::Context &ctx, const std::string &prefix,
+  [[nodiscard]] rocksdb::Status Keys(engine::Context &ctx, const std::string &prefix, const std::string &suffix_glob,
                                      std::vector<std::string> *keys = nullptr, KeyNumStats *stats = nullptr);
   [[nodiscard]] rocksdb::Status Scan(engine::Context &ctx, const std::string &cursor, uint64_t limit,
-                                     const std::string &prefix, std::vector<std::string> *keys,
-                                     std::string *end_cursor = nullptr, RedisType type = kRedisNone);
+                                     const std::string &prefix, const std::string &suffix_glob,
+                                     std::vector<std::string> *keys, std::string *end_cursor = nullptr,
+                                     RedisType type = kRedisNone);
   [[nodiscard]] rocksdb::Status RandomKey(engine::Context &ctx, const std::string &cursor, std::string *key);
   std::string AppendNamespacePrefix(const Slice &user_key);
   [[nodiscard]] rocksdb::Status ClearKeysOfSlotRange(engine::Context &ctx, const rocksdb::Slice &ns,