Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add regexp_escape function
Browse files Browse the repository at this point in the history
chrisiou committed Dec 14, 2023
1 parent 9f5e075 commit 42bbce9
Showing 6 changed files with 129 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/core_functions/function_list.cpp
Original file line number Diff line number Diff line change
@@ -276,6 +276,7 @@ static StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION(RadiansFun),
DUCKDB_SCALAR_FUNCTION(RandomFun),
DUCKDB_SCALAR_FUNCTION_SET(ListRangeFun),
DUCKDB_SCALAR_FUNCTION(RegexpEscapeFun),
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(RegexpSplitToArrayFun),
DUCKDB_AGGREGATE_FUNCTION(RegrAvgxFun),
DUCKDB_AGGREGATE_FUNCTION(RegrAvgyFun),
1 change: 1 addition & 0 deletions src/core_functions/scalar/string/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@ add_library_unity(
md5.cpp
pad.cpp
printf.cpp
regexp_escape.cpp
repeat.cpp
replace.cpp
reverse.cpp
8 changes: 8 additions & 0 deletions src/core_functions/scalar/string/functions.json
Original file line number Diff line number Diff line change
@@ -304,5 +304,13 @@
"description": "Converts a value to a string in the given base radix, optionally padding with leading zeros to the minimum length",
"example": "to_base(42, 16)",
"type": "scalar_function_set"
},
{
"name": "regexp_escape",
"parameters": "string",
"description": "Escapes characters with special meaning in a regular expression such as !$()*+.:<=>?[\\]^{|}-",
"example": "regexp_escape('https://duckdb.org')",
"struct": "RegexpEscapeFun",
"type": "scalar_function"
}
]
35 changes: 35 additions & 0 deletions src/core_functions/scalar/string/regexp_escape.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "duckdb/core_functions/scalar/string_functions.hpp"
#include "duckdb/common/string_util.hpp"

namespace duckdb {

static string_t escape(const string_t &str, vector<char> &escaped_pattern) {
auto input_str = str.GetData();
auto size_str = str.GetSize();

escaped_pattern.clear(); // reuse the buffer
// note: reserving double the size to account for escaping ('\\') as an average case
// to have half of the characters in the input string are special
escaped_pattern.reserve(2 * size_str);
string special_chars = "()[]{}?*+-|^$\\.&~#";
for (idx_t i = 0; i < size_str; ++i) {
char ch = input_str[i];
if (special_chars.find(ch) != std::string::npos) {
escaped_pattern.push_back('\\'); // escape the special character
}
escaped_pattern.push_back(ch);
}
return string_t(escaped_pattern.data(), escaped_pattern.size());
}

static void RegexpEscapeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
vector<char> escaped_pattern;
auto input_str = args.GetValue(0, 0);
result.Reference(escape(input_str.ToString(), escaped_pattern));
}

ScalarFunction RegexpEscapeFun::GetFunction() {
return ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, RegexpEscapeFunction);
}

} // namespace duckdb
9 changes: 9 additions & 0 deletions src/include/duckdb/core_functions/scalar/string_functions.hpp
Original file line number Diff line number Diff line change
@@ -480,4 +480,13 @@ struct ToBaseFun {
static ScalarFunctionSet GetFunctions();
};

struct RegexpEscapeFun {
static constexpr const char *Name = "regexp_escape";
static constexpr const char *Parameters = "string";
static constexpr const char *Description = "Return the input string with all the regular expression metacharacters escaped";
static constexpr const char *Example = "regexp_escape('https://duckdb.org')";

static ScalarFunction GetFunction();
};

} // namespace duckdb
75 changes: 75 additions & 0 deletions test/sql/function/string/regex_escape.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# name: test/sql/function/string/regex_escape.test
# description: regex escape test
# group: [string]

statement ok
PRAGMA enable_verification

# test the example
query T
SELECT regexp_escape('https://duckdb.org');
----
https://duckdb\.org

# no special chars in the input
query T
SELECT regexp_escape('abc');
----
abc

# metacharacters
query T
SELECT regexp_escape('a.b');
----
a\.b

query T
SELECT regexp_escape('()[]{}*+?|^$');
----
\(\)\[\]\{\}\*\+\?\|\^\$

query T
SELECT regexp_escape('a.b[c]*');
----
a\.b\[c\]\*

# empty string
query T
SELECT regexp_escape('""');
----
""

# whitespaces
query T
SELECT regexp_escape('a b c');
----
a b c

query T
SELECT regexp_escape('line1\nline2');
----
line1\\nline2

# case sensitive
query T
SELECT regexp_escape('CaseSensitive');
----
CaseSensitive

# unicode character
query T
SELECT regexp_escape('@');
----
@

# backslashes
query T
SELECT regexp_escape('path\to\wonderland');
----
path\\to\\wonderland

# escape all special characters
query T
SELECT regexp_escape('$()*+.?[\]^{|}-');
----
\$\(\)\*\+\.\?\[\\\]\^\{\|\}\-

0 comments on commit 42bbce9

Please sign in to comment.