Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for wildcard exclusion in index pattern #4458

Merged
merged 3 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions quickwit/quickwit-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,23 @@ pub fn validate_identifier(label: &str, value: &str) -> anyhow::Result<()> {
/// Checks whether an index ID pattern conforms to Quickwit conventions.
/// Index ID patterns accept the same characters as identifiers AND accept `*`
/// chars to allow for glob-like patterns.
pub fn validate_index_id_pattern(pattern: &str) -> anyhow::Result<()> {
pub fn validate_index_id_pattern(pattern: &str, allow_negative: bool) -> anyhow::Result<()> {
static IDENTIFIER_REGEX_WITH_GLOB_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[a-zA-Z\*][a-zA-Z0-9-_\.\*]{0,254}$")
.expect("regular expression should compile")
});
if !IDENTIFIER_REGEX_WITH_GLOB_PATTERN.is_match(pattern) {
static IDENTIFIER_REGEX_WITH_GLOB_PATTERN_NEGATIVE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^-?[a-zA-Z\*][a-zA-Z0-9-_\.\*]{0,254}$")
.expect("regular expression should compile")
});

let regex = if allow_negative {
&IDENTIFIER_REGEX_WITH_GLOB_PATTERN_NEGATIVE
} else {
&IDENTIFIER_REGEX_WITH_GLOB_PATTERN
};

if !regex.is_match(pattern) {
bail!(
"index ID pattern `{pattern}` is invalid: patterns must match the following regular \
expression: `^[a-zA-Z\\*][a-zA-Z0-9-_\\.\\*]{{0,254}}$`"
Expand Down Expand Up @@ -269,14 +280,16 @@ mod tests {

#[test]
fn test_validate_index_id_pattern() {
validate_index_id_pattern("*").unwrap();
validate_index_id_pattern("abc.*").unwrap();
validate_index_id_pattern("ab").unwrap_err();
validate_index_id_pattern("").unwrap_err();
validate_index_id_pattern("**").unwrap_err();
assert!(validate_index_id_pattern("foo!")
validate_index_id_pattern("*", false).unwrap();
validate_index_id_pattern("abc.*", false).unwrap();
validate_index_id_pattern("ab", false).unwrap_err();
validate_index_id_pattern("", false).unwrap_err();
validate_index_id_pattern("**", false).unwrap_err();
assert!(validate_index_id_pattern("foo!", false)
.unwrap_err()
.to_string()
.contains("index ID pattern `foo!` is invalid:"));
validate_index_id_pattern("-abc", true).unwrap();
validate_index_id_pattern("-abc", false).unwrap_err();
}
}
28 changes: 24 additions & 4 deletions quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -693,8 +693,25 @@ impl MetastoreService for FileBackedMetastore {
// 2) Get each index metadata. Note that each get will take a read lock on
// `per_index_metastores`. Lock is released in 1) to let a concurrent task/thread to
// take a write lock on `per_index_metastores`.
let index_matcher =
build_regex_set_from_patterns(request.index_id_patterns).map_err(|error| {
let mut positive_patterns = Vec::new();
let mut negative_patterns = Vec::new();
for pattern in request.index_id_patterns {
if let Some(negative_pattern) = pattern.strip_prefix('-') {
negative_patterns.push(negative_pattern.to_string());
} else {
positive_patterns.push(pattern);
}
}
let positive_index_matcher =
build_regex_set_from_patterns(positive_patterns).map_err(|error| {
MetastoreError::Internal {
message: "failed to build RegexSet from index patterns`".to_string(),
cause: error.to_string(),
}
})?;

let negative_index_matcher =
build_regex_set_from_patterns(negative_patterns).map_err(|error| {
MetastoreError::Internal {
message: "failed to build RegexSet from index patterns`".to_string(),
cause: error.to_string(),
Expand All @@ -709,7 +726,10 @@ impl MetastoreService for FileBackedMetastore {
IndexState::Alive(_) => Some(index_id),
_ => None,
})
.filter(|index_id| index_matcher.is_match(index_id))
.filter(|index_id| {
positive_index_matcher.is_match(index_id)
&& !negative_index_matcher.is_match(index_id)
})
.cloned()
.collect()
};
Expand Down Expand Up @@ -951,7 +971,7 @@ fn build_regex_set_from_patterns(patterns: Vec<String>) -> anyhow::Result<RegexS
/// Converts the tokens into a valid regex.
fn build_regex_exprs_from_pattern(index_pattern: &str) -> anyhow::Result<String> {
// Note: consecutive '*' are not allowed in the pattern.
validate_index_id_pattern(index_pattern)?;
validate_index_id_pattern(index_pattern, false)?;
let mut re: String = String::new();
re.push('^');
re.push_str(&index_pattern.split('*').map(regex::escape).join(".*"));
Expand Down
80 changes: 67 additions & 13 deletions quickwit/quickwit-metastore/src/metastore/postgres/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1401,32 +1401,67 @@ fn tags_filter_expression_helper(tags: &TagFilterAst) -> Cond {
}

/// Builds a SQL query that returns indexes which match at least one pattern in
/// `index_id_patterns`. For each pattern, we check if the pattern is valid and replace `*` by `%`
/// `index_id_patterns`, and none of the patterns starting with '-'
///
/// For each pattern, we check if the pattern is valid and replace `*` by `%`
/// to build a SQL `LIKE` query.
fn build_index_id_patterns_sql_query(index_id_patterns: &[String]) -> anyhow::Result<String> {
if index_id_patterns.is_empty() {
let mut positive_patterns = Vec::new();
let mut negative_patterns = Vec::new();
for pattern in index_id_patterns {
if let Some(negative_pattern) = pattern.strip_prefix('-') {
negative_patterns.push(negative_pattern.to_string());
} else {
positive_patterns.push(pattern);
}
}

if positive_patterns.is_empty() {
anyhow::bail!("The list of index id patterns may not be empty.");
}
if index_id_patterns.iter().any(|pattern| pattern == "*") {

if index_id_patterns.iter().any(|pattern| pattern == "*") && negative_patterns.is_empty() {
return Ok("SELECT * FROM indexes".to_string());
}

let mut where_like_query = String::new();
for (index_id_pattern_idx, index_id_pattern) in index_id_patterns.iter().enumerate() {
validate_index_id_pattern(index_id_pattern).map_err(|error| MetastoreError::Internal {
message: "failed to build list indexes query".to_string(),
cause: error.to_string(),
for (index_id_pattern_idx, index_id_pattern) in positive_patterns.iter().enumerate() {
validate_index_id_pattern(index_id_pattern, false).map_err(|error| {
MetastoreError::Internal {
message: "failed to build list indexes query".to_string(),
cause: error.to_string(),
}
})?;
if index_id_pattern_idx != 0 {
where_like_query.push_str(" OR ");
}
if index_id_pattern.contains('*') {
let sql_pattern = index_id_pattern.replace('*', "%");
let _ = write!(where_like_query, "index_id LIKE '{sql_pattern}'");
} else {
let _ = write!(where_like_query, "index_id = '{index_id_pattern}'");
}
if index_id_pattern_idx < index_id_patterns.len() - 1 {
where_like_query.push_str(" OR ");
}
let mut negative_like_query = String::new();
for index_id_pattern in negative_patterns.iter() {
validate_index_id_pattern(index_id_pattern, false).map_err(|error| {
MetastoreError::Internal {
message: "failed to build list indexes query".to_string(),
cause: error.to_string(),
}
})?;
negative_like_query.push_str(" AND ");
if index_id_pattern.contains('*') {
let sql_pattern = index_id_pattern.replace('*', "%");
let _ = write!(negative_like_query, "index_id NOT LIKE '{sql_pattern}'");
} else {
let _ = write!(negative_like_query, "index_id <> '{index_id_pattern}'");
}
}
Ok(format!("SELECT * FROM indexes WHERE {where_like_query}"))

Ok(format!(
"SELECT * FROM indexes WHERE ({where_like_query}){negative_like_query}"
))
}

/// A postgres metastore factory
Expand Down Expand Up @@ -1868,16 +1903,16 @@ mod tests {
fn test_index_id_pattern_like_query() {
assert_eq!(
&build_index_id_patterns_sql_query(&["*-index-*-last*".to_string()]).unwrap(),
"SELECT * FROM indexes WHERE index_id LIKE '%-index-%-last%'"
"SELECT * FROM indexes WHERE (index_id LIKE '%-index-%-last%')"
);
assert_eq!(
&build_index_id_patterns_sql_query(&[
"*-index-*-last*".to_string(),
"another-index".to_string()
])
.unwrap(),
"SELECT * FROM indexes WHERE index_id LIKE '%-index-%-last%' OR index_id = \
'another-index'"
"SELECT * FROM indexes WHERE (index_id LIKE '%-index-%-last%' OR index_id = \
'another-index')"
);
assert_eq!(
&build_index_id_patterns_sql_query(&[
Expand All @@ -1896,5 +1931,24 @@ mod tests {
`*-index-*-&-last**` is invalid: patterns must match the following regular \
expression: `^[a-zA-Z\\*][a-zA-Z0-9-_\\.\\*]{0,254}$``"
);

assert_eq!(
&build_index_id_patterns_sql_query(&["*".to_string(), "-index-name".to_string()])
.unwrap(),
"SELECT * FROM indexes WHERE (index_id LIKE '%') AND index_id <> 'index-name'"
);

assert_eq!(
&build_index_id_patterns_sql_query(&[
"*-index-*-last*".to_string(),
"another-index".to_string(),
"-*-index-1-last*".to_string(),
"-index-2-last".to_string(),
])
.unwrap(),
"SELECT * FROM indexes WHERE (index_id LIKE '%-index-%-last%' OR index_id = \
'another-index') AND index_id NOT LIKE '%-index-1-last%' AND index_id <> \
'index-2-last'"
);
}
}
2 changes: 1 addition & 1 deletion quickwit/quickwit-opentelemetry/src/otlp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ pub fn extract_otel_traces_index_id_patterns_from_metadata(
if index_id_pattern.is_empty() {
continue;
}
validate_index_id_pattern(index_id_pattern).map_err(|error| {
validate_index_id_pattern(index_id_pattern, true).map_err(|error| {
Status::internal(format!(
"invalid index ID pattern in request header: {error}",
))
Expand Down
3 changes: 3 additions & 0 deletions quickwit/quickwit-proto/protos/quickwit/metastore.proto
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ message CreateIndexResponse {

message ListIndexesMetadataRequest {
reserved 1;
// List of patterns an index should match or not match to get considered
// An index must match at least one positive pattern (a pattern not starting
// with a '-'), and no negative pattern (a pattern starting with a '-').
repeated string index_id_patterns = 2;
}

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion quickwit/quickwit-search/src/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ pub fn check_all_index_metadata_found(
let mut index_ids: HashSet<&str> = index_id_patterns
.iter()
.map(|index_ptn| index_ptn.as_str())
.filter(|index_ptn| !index_ptn.contains('*'))
.filter(|index_ptn| !index_ptn.contains('*') && !index_ptn.starts_with('-'))
.collect();

if index_ids.is_empty() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ async fn es_compat_index_multi_search(
)));
}
for index in &request_header.index {
validate_index_id_pattern(index).map_err(|err| {
validate_index_id_pattern(index, true).map_err(|err| {
SearchError::InvalidArgument(format!(
"request header contains an invalid index: {}",
err
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-serve/src/search_api/rest_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub(crate) async fn extract_index_id_patterns(
let mut index_id_patterns = Vec::new();

for index_id_pattern in percent_decoded_comma_separated_index_id_patterns.split(',') {
validate_index_id_pattern(index_id_pattern)
validate_index_id_pattern(index_id_pattern, true)
.map_err(|error| crate::rest::InvalidArgument(error.to_string()))?;
index_id_patterns.push(index_id_pattern.to_string());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,28 @@ expected:
login: fulmicoton
_index: "gharchive-1"
---
endpoint: "gharchive-*,-gharchive-2/_search"
params:
q: "*"
expected:
hits:
total:
value: 2
relation: "eq"
hits:
$expect: "len(val) == 2"
---
endpoint: "gharchive-*,-*-2/_search"
params:
q: "*"
expected:
hits:
total:
value: 2
relation: "eq"
hits:
$expect: "len(val) == 2"
---
# It is valid to have a pattern that does not match
# any index.
endpoint: "invalidptn-*/_search"
Expand Down
Loading