From 12f1ea2309ff63345e78ec89236286b5a425da73 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 12 Aug 2025 11:53:12 +0100 Subject: [PATCH 01/15] api sketch --- src/action.rs | 1 + src/action/search.rs | 305 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 src/action/search.rs diff --git a/src/action.rs b/src/action.rs index 91ebc6fd4..a5ed56b04 100644 --- a/src/action.rs +++ b/src/action.rs @@ -23,6 +23,7 @@ mod list_indexes; mod perf; mod replace_one; mod run_command; +mod search; mod search_index; mod session; mod shutdown; diff --git a/src/action/search.rs b/src/action/search.rs new file mode 100644 index 000000000..f38f02630 --- /dev/null +++ b/src/action/search.rs @@ -0,0 +1,305 @@ +use std::marker::PhantomData; + +use crate::bson::{doc, Bson, Document}; + +pub struct AtlasSearch { + name: &'static str, + stage: Document, + _t: PhantomData, +} + +impl Into for AtlasSearch { + fn into(self) -> Document { + doc! { + "$search": { + self.name: self.stage + } + } + } +} + +pub struct Autocomplete; +pub struct Compound; +pub struct Text; +pub struct Built; + +impl AtlasSearch { + pub fn build(self) -> AtlasSearch { + AtlasSearch { + name: self.name, + stage: self.stage, + _t: PhantomData::default(), + } + } + + pub fn on_index(self, index: impl AsRef) -> Document { + let mut out: Document = self.into(); + // unwrap safety: AtlasSearch::into always produces a "$search" value + out.get_document_mut("$search") + .unwrap() + .insert("index", index.as_ref()); + out + } +} + +impl IntoIterator for AtlasSearch { + type Item = AtlasSearch; + + type IntoIter = std::iter::Once>; + + fn into_iter(self) -> Self::IntoIter { + std::iter::once(self) + } +} + +impl AtlasSearch { + /// Perform a search for a word or phrase that contains a sequence of characters from an + /// incomplete input string. + pub fn autocomplete(query: impl StringOrArray, path: impl AsRef) -> Self { + AtlasSearch { + name: "autocomplete", + stage: doc! { + "query": query.to_bson(), + "path": path.as_ref(), + }, + _t: PhantomData::default(), + } + } + + /// Enable fuzzy search. Find strings which are similar to the search term or terms. + pub fn fuzzy(mut self, options: Document) -> Self { + self.stage.insert("fuzzy", options); + self + } + + /// Score to assign to the matching search term results. + pub fn score(mut self, options: Document) -> Self { + self.stage.insert("score", options); + self + } + + /// Order in which to search for tokens. + pub fn token_order(mut self, order: TokenOrder) -> Self { + self.stage.insert("tokenOrder", order.name()); + self + } +} + +/// Order in which to search for tokens. +pub enum TokenOrder { + /// Indicates tokens in the query can appear in any order in the documents. + Any, + /// Indicates tokens in the query must appear adjacent to each other or in the order specified + /// in the query in the documents. + Sequential, +} + +impl TokenOrder { + fn name(&self) -> &'static str { + match self { + Self::Any => "any", + Self::Sequential => "sequential", + } + } +} + +impl AtlasSearch { + pub fn compound() -> Self { + AtlasSearch { + name: "compound", + stage: doc! {}, + _t: PhantomData::default(), + } + } + + pub fn must(mut self, clauses: impl IntoIterator>) -> Self { + self.stage.insert( + "must", + clauses.into_iter().map(|sq| sq.stage).collect::>(), + ); + self + } + + pub fn must_not(mut self, clauses: impl IntoIterator>) -> Self { + self.stage.insert( + "mustNot", + clauses.into_iter().map(|sq| sq.stage).collect::>(), + ); + self + } + + pub fn should(mut self, clauses: impl IntoIterator>) -> Self { + self.stage.insert( + "should", + clauses.into_iter().map(|sq| sq.stage).collect::>(), + ); + self + } +} + +impl AtlasSearch { + pub fn text(query: impl StringOrArray, path: impl StringOrArray) -> Self { + AtlasSearch { + name: "text", + stage: doc! { + "query": query.to_bson(), + "path": path.to_bson(), + }, + _t: PhantomData::default(), + } + } + + pub fn fuzzy(mut self, options: Document) -> Self { + self.stage.insert("fuzzy", options); + self + } + + pub fn match_criteria(mut self, criteria: MatchCriteria) -> Self { + self.stage.insert("matchCriteria", criteria.name()); + self + } +} + +pub enum MatchCriteria { + Any, + All, +} + +impl MatchCriteria { + fn name(&self) -> &'static str { + match self { + Self::Any => "any", + Self::All => "all", + } + } +} + +pub trait StringOrArray { + fn to_bson(self) -> Bson; +} + +impl StringOrArray for &str { + fn to_bson(self) -> Bson { + Bson::String(self.to_owned()) + } +} + +impl StringOrArray for String { + fn to_bson(self) -> Bson { + Bson::String(self) + } +} + +impl StringOrArray for &String { + fn to_bson(self) -> Bson { + Bson::String(self.clone()) + } +} + +impl StringOrArray for &[&str] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.to_owned())).collect()) + } +} + +impl StringOrArray for &[&str; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.to_owned())).collect()) + } +} + +impl StringOrArray for &[String] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for &[String; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for [String; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.into_iter().map(Bson::String).collect()) + } +} + +impl StringOrArray for &[&String] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for &[&String; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for Vec<&str> { + fn to_bson(self) -> Bson { + Bson::Array( + self.into_iter() + .map(|s| Bson::String(s.to_owned())) + .collect(), + ) + } +} + +impl StringOrArray for Vec { + fn to_bson(self) -> Bson { + Bson::Array(self.into_iter().map(Bson::String).collect()) + } +} + +impl StringOrArray for Vec<&String> { + fn to_bson(self) -> Bson { + Bson::Array(self.into_iter().map(|s| Bson::String(s.clone())).collect()) + } +} + +#[test] +fn api_flow() { + let coll: crate::Collection = todo!(); + #[allow(unreachable_code)] + { + let _ = coll.aggregate(vec![ + AtlasSearch::autocomplete("pre", "title") + .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) + .into(), + doc! { + "$limit": 10, + }, + doc! { + "$project": { + "_id": 0, + "title": 1, + } + }, + ]); + let _ = coll.aggregate(vec![ + AtlasSearch::text("baseball", "plot").into(), + doc! { "$limit": 3 }, + doc! { + "$project": { + "_id": 0, + "title": 1, + "plot": 1, + } + }, + ]); + let _ = coll.aggregate(vec![ + AtlasSearch::compound() + .must(AtlasSearch::text("varieties", "description")) + .should(AtlasSearch::text("Fuji", "description")) + .into(), + doc! { + "$project": { + "score": { "$meta": "searchScore" } + } + }, + ]); + } +} From 86c0f674e942b5e1bbed5d16f675d947c9c6dd01 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 14 Aug 2025 11:44:00 +0100 Subject: [PATCH 02/15] skeleton and yaml sync --- etc/gen_atlas_search/.gitignore | 1 + etc/gen_atlas_search/Cargo.lock | 119 +++++++++++ etc/gen_atlas_search/Cargo.toml | 8 + etc/gen_atlas_search/src/main.rs | 27 +++ etc/gen_atlas_search/update-yaml.sh | 17 ++ .../yaml/search/autocomplete.yaml | 152 ++++++++++++++ .../yaml/search/compound.yaml | 156 ++++++++++++++ .../yaml/search/embeddedDocument.yaml | 155 ++++++++++++++ etc/gen_atlas_search/yaml/search/equals.yaml | 104 ++++++++++ etc/gen_atlas_search/yaml/search/exists.yaml | 56 +++++ etc/gen_atlas_search/yaml/search/facet.yaml | 56 +++++ .../yaml/search/geoShape.yaml | 123 +++++++++++ .../yaml/search/geoWithin.yaml | 103 ++++++++++ etc/gen_atlas_search/yaml/search/in.yaml | 89 ++++++++ .../yaml/search/moreLikeThis.yaml | 99 +++++++++ etc/gen_atlas_search/yaml/search/near.yaml | 124 +++++++++++ etc/gen_atlas_search/yaml/search/phrase.yaml | 109 ++++++++++ .../yaml/search/queryString.yaml | 35 ++++ etc/gen_atlas_search/yaml/search/range.yaml | 139 +++++++++++++ etc/gen_atlas_search/yaml/search/regex.yaml | 42 ++++ etc/gen_atlas_search/yaml/search/text.yaml | 194 ++++++++++++++++++ .../yaml/search/wildcard.yaml | 60 ++++++ 22 files changed, 1968 insertions(+) create mode 100644 etc/gen_atlas_search/.gitignore create mode 100644 etc/gen_atlas_search/Cargo.lock create mode 100644 etc/gen_atlas_search/Cargo.toml create mode 100644 etc/gen_atlas_search/src/main.rs create mode 100755 etc/gen_atlas_search/update-yaml.sh create mode 100644 etc/gen_atlas_search/yaml/search/autocomplete.yaml create mode 100644 etc/gen_atlas_search/yaml/search/compound.yaml create mode 100644 etc/gen_atlas_search/yaml/search/embeddedDocument.yaml create mode 100644 etc/gen_atlas_search/yaml/search/equals.yaml create mode 100644 etc/gen_atlas_search/yaml/search/exists.yaml create mode 100644 etc/gen_atlas_search/yaml/search/facet.yaml create mode 100644 etc/gen_atlas_search/yaml/search/geoShape.yaml create mode 100644 etc/gen_atlas_search/yaml/search/geoWithin.yaml create mode 100644 etc/gen_atlas_search/yaml/search/in.yaml create mode 100644 etc/gen_atlas_search/yaml/search/moreLikeThis.yaml create mode 100644 etc/gen_atlas_search/yaml/search/near.yaml create mode 100644 etc/gen_atlas_search/yaml/search/phrase.yaml create mode 100644 etc/gen_atlas_search/yaml/search/queryString.yaml create mode 100644 etc/gen_atlas_search/yaml/search/range.yaml create mode 100644 etc/gen_atlas_search/yaml/search/regex.yaml create mode 100644 etc/gen_atlas_search/yaml/search/text.yaml create mode 100644 etc/gen_atlas_search/yaml/search/wildcard.yaml diff --git a/etc/gen_atlas_search/.gitignore b/etc/gen_atlas_search/.gitignore new file mode 100644 index 000000000..eb5a316cb --- /dev/null +++ b/etc/gen_atlas_search/.gitignore @@ -0,0 +1 @@ +target diff --git a/etc/gen_atlas_search/Cargo.lock b/etc/gen_atlas_search/Cargo.lock new file mode 100644 index 000000000..4d93cdd47 --- /dev/null +++ b/etc/gen_atlas_search/Cargo.lock @@ -0,0 +1,119 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "gen_atlas_search" +version = "0.1.0" +dependencies = [ + "serde", + "serde_yaml", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "proc-macro2" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "syn" +version = "2.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bc3fcb250e53458e712715cf74285c1f889686520d79294a9ef3bd7aa1fc619" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" diff --git a/etc/gen_atlas_search/Cargo.toml b/etc/gen_atlas_search/Cargo.toml new file mode 100644 index 000000000..c7a240688 --- /dev/null +++ b/etc/gen_atlas_search/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "gen_atlas_search" +version = "0.1.0" +edition = "2024" + +[dependencies] +serde = { version = "1.0.219", features = ["derive"] } +serde_yaml = "0.9.34" diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs new file mode 100644 index 000000000..ece2285e3 --- /dev/null +++ b/etc/gen_atlas_search/src/main.rs @@ -0,0 +1,27 @@ +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +struct Operator { + name: String, + link: String, + #[serde(rename = "type")] + type_: Vec, + encode: EncodeType, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +enum OperatorType { + SearchOperator, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +enum EncodeType { + Object, +} + +fn main() { + println!("Hello, world!"); +} diff --git a/etc/gen_atlas_search/update-yaml.sh b/etc/gen_atlas_search/update-yaml.sh new file mode 100755 index 000000000..c2174721b --- /dev/null +++ b/etc/gen_atlas_search/update-yaml.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -o errexit +set -x + +basedir=$(dirname $(realpath $0)) +tmpdir=$(mktemp -d) + +git clone --sparse --depth 1 "https://github.com/mongodb/mongo-php-library.git" "${tmpdir}" +cd "${tmpdir}" +git sparse-checkout add generator/config/search + +cd "${basedir}" +mkdir -p "yaml/search" +rsync -ah "${tmpdir}/generator/config/search/" "yaml/search" --delete + +#rm -rf "${tmpdir}" \ No newline at end of file diff --git a/etc/gen_atlas_search/yaml/search/autocomplete.yaml b/etc/gen_atlas_search/yaml/search/autocomplete.yaml new file mode 100644 index 000000000..a984b9a39 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/autocomplete.yaml @@ -0,0 +1,152 @@ +# $schema: ../schema.json +name: autocomplete +link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/' +type: + - searchOperator +encode: object +description: | + The autocomplete operator performs a search for a word or phrase that + contains a sequence of characters from an incomplete input string. The + fields that you intend to query with the autocomplete operator must be + indexed with the autocomplete data type in the collection's index definition. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: tokenOrder + optional: true + type: + - string # any|sequential + - + name: fuzzy + optional: true + type: + - object + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#basic-example' + pipeline: + - + $search: + autocomplete: + query: 'off' + path: 'title' + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + + - + name: 'Fuzzy' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#fuzzy-example' + pipeline: + - + $search: + autocomplete: + query: 'pre' + path: 'title' + fuzzy: + maxEdits: 1 + prefixLength: 1 + maxExpansions: 256 + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + + - + name: 'Token Order any' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#simple-any-example' + pipeline: + - + $search: + autocomplete: + query: 'men with' + path: 'title' + tokenOrder: 'any' + - + $limit: 4 + - + $project: + _id: 0 + title: 1 + + - + name: 'Token Order sequential' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#simple-sequential-example' + pipeline: + - + $search: + autocomplete: + query: 'men with' + path: 'title' + tokenOrder: 'sequential' + - + $limit: 4 + - + $project: + _id: 0 + title: 1 + + - + name: 'Highlighting' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#highlighting-example' + pipeline: + - + $search: + autocomplete: + query: 'ger' + path: 'title' + highlight: + path: 'title' + - + $limit: 5 + - + $project: + score: + $meta: 'searchScore' + _id: 0 + title: 1 + highlights: + $meta: 'searchHighlights' + + - + name: 'Across Multiple Fields' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#search-across-multiple-fields' + pipeline: + - + $search: + compound: + should: + - + autocomplete: + query: 'inter' + path: 'title' + - + autocomplete: + query: 'inter' + path: 'plot' + minimumShouldMatch: 1 + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + plot: 1 diff --git a/etc/gen_atlas_search/yaml/search/compound.yaml b/etc/gen_atlas_search/yaml/search/compound.yaml new file mode 100644 index 000000000..7a1d9f419 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/compound.yaml @@ -0,0 +1,156 @@ +# $schema: ../schema.json +name: compound +link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/' +type: + - searchOperator +encode: object +description: | + The compound operator combines two or more operators into a single query. + Each element of a compound query is called a clause, and each clause + consists of one or more sub-queries. +arguments: + - + name: must + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: mustNot + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: should + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: filter + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: minimumShouldMatch + optional: true + type: + - int + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'must and mustNot' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#must-and-mustnot-example' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + mustNot: + - + text: + query: 'apples' + path: 'description' + + - + name: 'must and should' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#must-and-should-example' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + should: + - + text: + query: 'Fuji' + path: 'description' + - + $project: + score: + $meta: 'searchScore' + + - + name: 'minimumShouldMatch' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#minimumshouldmatch-example' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + should: + - + text: + query: 'Fuji' + path: 'description' + - + text: + query: 'Golden Delicious' + path: 'description' + minimumShouldMatch: 1 + + - + name: 'Filter' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#filter-examples' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + should: + - + text: + query: 'banana' + path: 'description' + filter: + - + text: + query: 'granny' + path: 'description' + + - + name: 'Nested' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#nested-example' + pipeline: + - + $search: + compound: + should: + - + text: + query: 'apple' + path: 'type' + - + compound: + must: + - + text: + query: 'organic' + path: 'category' + - + equals: + value: true + path: 'in_stock' + minimumShouldMatch: 1 diff --git a/etc/gen_atlas_search/yaml/search/embeddedDocument.yaml b/etc/gen_atlas_search/yaml/search/embeddedDocument.yaml new file mode 100644 index 000000000..19c804625 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/embeddedDocument.yaml @@ -0,0 +1,155 @@ +# $schema: ../schema.json +name: embeddedDocument +link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/' +type: + - searchOperator +encode: object +description: | + The embeddedDocument operator is similar to $elemMatch operator. + It constrains multiple query predicates to be satisfied from a single + element of an array of embedded documents. embeddedDocument can be used only + for queries over fields of the embeddedDocuments +arguments: + - + name: path + type: + - searchPath + - + name: operator + type: + - searchOperator + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#index-definition' + pipeline: + - + $search: + embeddedDocument: + path: 'items' + operator: + compound: + must: + - + text: + path: 'items.tags' + query: 'school' + should: + - + text: + path: 'items.name' + query: 'backpack' + score: + embedded: + aggregate: 'mean' + - + $limit: 5 + - + $project: + _id: 0 + items.name: 1 + items.tags: 1 + score: + $meta: 'searchScore' + + - + name: 'Facet' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#facet-query' + pipeline: + - + $searchMeta: + facet: + operator: + embeddedDocument: + path: 'items' + operator: + compound: + must: + - + text: + path: 'items.tags' + query: 'school' + should: + - + text: + path: 'items.name' + query: 'backpack' + facets: + purchaseMethodFacet: + type: 'string' + path: 'purchaseMethod' + + - + name: 'Query and Sort' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#query-and-sort' + pipeline: + - + $search: + embeddedDocument: + path: 'items' + operator: + text: + path: 'items.name' + query: 'laptop' + sort: + items.tags: 1 + - + $limit: 5 + - + $project: + _id: 0 + items.name: 1 + items.tags: 1 + score: + $meta: 'searchScore' + + - + name: 'Query for Matching Embedded Documents Only' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#query-for-matching-embedded-documents-only' + pipeline: + - + $search: + embeddedDocument: + path: 'items' + operator: + compound: + must: + - + range: + path: 'items.quantity' + gt: 2 + - + exists: + path: 'items.price' + - + text: + path: 'items.tags' + query: 'school' + - + $limit: 2 + - + $project: + _id: 0 + storeLocation: 1 + items: + $filter: + input: '$items' + cond: + $and: + - + $ifNull: + - '$$this.price' + - 'false' + - + $gt: + - '$$this.quantity' + - 2 + - + $in: + - 'office' + - '$$this.tags' diff --git a/etc/gen_atlas_search/yaml/search/equals.yaml b/etc/gen_atlas_search/yaml/search/equals.yaml new file mode 100644 index 000000000..b3e50c641 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/equals.yaml @@ -0,0 +1,104 @@ +# $schema: ../schema.json +name: equals +link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/' +type: + - searchOperator +encode: object +description: | + The equals operator checks whether a field matches a value you specify. +arguments: + - + name: path + type: + - searchPath + - + name: value + type: + - binData + - bool + - date + - objectId + - 'null' + - number + - string + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Boolean' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#boolean-examples' + pipeline: + - + $search: + equals: + path: 'verified_user' + value: true + - + $project: + name: 1 + _id: 0 + score: + $meta: 'searchScore' + + - + name: 'ObjectId' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#objectid-example' + pipeline: + - + $search: + equals: + path: 'teammates' + value: !bson_objectId '5a9427648b0beebeb69589a1' + + - + name: 'Date' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#date-example' + pipeline: + - + $search: + equals: + path: 'account_created' + value: !bson_utcdatetime '2022-05-04T05:01:08.000+00:00' + + - + name: 'Number' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#number-example' + pipeline: + - + $search: + equals: + path: 'employee_number' + value: 259 + + - + name: 'String' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#string-example' + pipeline: + - + $search: + equals: + path: 'name' + value: 'jim hall' + + - + name: 'UUID' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#uuid-example' + pipeline: + - + $search: + equals: + path: 'uuid' + value: !bson_uuid 'fac32260-b511-4c69-8485-a2be5b7dda9e' + + - + name: 'Null' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#null-example' + pipeline: + - + $search: + equals: + path: 'job_title' + value: ~ diff --git a/etc/gen_atlas_search/yaml/search/exists.yaml b/etc/gen_atlas_search/yaml/search/exists.yaml new file mode 100644 index 000000000..062e8ba59 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/exists.yaml @@ -0,0 +1,56 @@ +# $schema: ../schema.json +name: exists +link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/' +type: + - searchOperator +encode: object +description: | + The exists operator tests if a path to a specified indexed field name exists in a document. +arguments: + - + name: path + type: + - searchPath + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/#basic-example' + pipeline: + - + $search: + exists: + path: 'type' + + - + name: 'Embedded' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/#embedded-example' + pipeline: + - + $search: + exists: + path: 'quantities.lemons' + + - + name: 'Compound' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/#compound-example' + pipeline: + - + $search: + compound: + must: + - + exists: + path: 'type' + - + text: + query: 'apple' + path: 'type' + should: + text: + query: 'fuji' + path: 'description' diff --git a/etc/gen_atlas_search/yaml/search/facet.yaml b/etc/gen_atlas_search/yaml/search/facet.yaml new file mode 100644 index 000000000..53dc8cba9 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/facet.yaml @@ -0,0 +1,56 @@ +# $schema: ../schema.json +name: facet +link: 'https://www.mongodb.com/docs/atlas/atlas-search/facet/' +type: + - searchOperator # should be searchCollector +encode: object +description: | + The facet collector groups results by values or ranges in the specified + faceted fields and returns the count for each of those groups. +arguments: + - + name: facets + type: + - object # map of facetDefinition + - + name: operator + optional: true + type: + - searchOperator +tests: + - + name: 'Facet' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/facet/#examples' + pipeline: + - + $search: + facet: + operator: + near: + path: 'released' + origin: !bson_utcdatetime '1999-07-01T00:00:00.000+00:00' + pivot: 7776000000 + facets: + genresFacet: + type: 'string' + path: 'genres' + - + $limit: 2 + - + $facet: + docs: + - + $project: + title: 1 + released: 1 + meta: + - + $replaceWith: '$$SEARCH_META' + - + $limit: 1 + - + $set: + meta: + $arrayElemAt: + - '$meta' + - 0 diff --git a/etc/gen_atlas_search/yaml/search/geoShape.yaml b/etc/gen_atlas_search/yaml/search/geoShape.yaml new file mode 100644 index 000000000..4da121e45 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/geoShape.yaml @@ -0,0 +1,123 @@ +# $schema: ../schema.json +name: geoShape +link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/' +type: + - searchOperator +encode: object +description: | + The geoShape operator supports querying shapes with a relation to a given + geometry if indexShapes is set to true in the index definition. +arguments: + - + name: path + type: + - searchPath + - + name: relation + type: + - string # contains | disjoint | intersects | within + - + name: geometry + type: + - geometry + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Disjoint' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/#disjoint-example' + pipeline: + - + $search: + geoShape: + relation: 'disjoint' + geometry: + type: 'Polygon' + coordinates: + - + - [-161.323242, 22.512557] + - [-152.446289, 22.065278] + - [-156.09375, 17.811456] + - [-161.323242, 22.512557] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' + + - + name: 'Intersect' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/#intersects-example' + pipeline: + - + $search: + geoShape: + relation: 'intersects' + geometry: + type: 'MultiPolygon' + coordinates: + - + - + - [2.16942, 41.40082] + - [2.17963, 41.40087] + - [2.18146, 41.39716] + - [2.15533, 41.40686] + - [2.14596, 41.38475] + - [2.17519, 41.41035] + - [2.16942, 41.40082] + - + - + - [2.16365, 41.39416] + - [2.16963, 41.39726] + - [2.15395, 41.38005] + - [2.17935, 41.43038] + - [2.16365, 41.39416] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' + + - + name: 'Within' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/#within-example' + pipeline: + - + $search: + geoShape: + relation: 'within' + geometry: + type: 'Polygon' + coordinates: + - + - [-74.3994140625, 40.5305017757] + - [-74.7290039063, 40.5805846641] + - [-74.7729492188, 40.9467136651] + - [-74.0698242188, 41.1290213475] + - [-73.65234375, 40.9964840144] + - [-72.6416015625, 40.9467136651] + - [-72.3559570313, 40.7971774152] + - [-74.3994140625, 40.5305017757] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/geoWithin.yaml b/etc/gen_atlas_search/yaml/search/geoWithin.yaml new file mode 100644 index 000000000..1739f1997 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/geoWithin.yaml @@ -0,0 +1,103 @@ +# $schema: ../schema.json +name: geoWithin +link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/' +type: + - searchOperator +encode: object +description: | + The geoWithin operator supports querying geographic points within a given + geometry. Only points are returned, even if indexShapes value is true in + the index definition. +arguments: + - + name: path + type: + - searchPath + - + name: box + optional: true + type: + - object + - + name: circle + optional: true + type: + - object + - + name: geometry + optional: true + type: + - geometry + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'box' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/#box-example' + pipeline: + - + $search: + geoWithin: + path: 'address.location' + box: + bottomLeft: + type: 'Point' + coordinates: [112.467, -55.05] + topRight: + type: 'Point' + coordinates: [168, -9.133] + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + + - + name: 'circle' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/#circle-example' + pipeline: + - + $search: + geoWithin: + circle: + center: + type: 'Point' + coordinates: [-73.54, 45.54] + radius: 1600 + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + + - + name: 'geometry' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/#geometry-examples' + pipeline: + - + $search: + geoWithin: + geometry: + type: 'Polygon' + coordinates: + - + - [-161.323242, 22.512557] + - [-152.446289, 22.065278] + - [-156.09375, 17.811456] + - [-161.323242, 22.512557] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 diff --git a/etc/gen_atlas_search/yaml/search/in.yaml b/etc/gen_atlas_search/yaml/search/in.yaml new file mode 100644 index 000000000..cc1aa6c33 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/in.yaml @@ -0,0 +1,89 @@ +# $schema: ../schema.json +name: in +link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/' +type: + - searchOperator +encode: object +description: | + The in operator performs a search for an array of BSON values in a field. +arguments: + - + name: path + type: + - searchPath + - + name: value + type: + - any + - array # of any + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Single Value Field Match' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/#examples' + pipeline: + - + $search: + in: + path: 'birthdate' + value: + - !bson_utcdatetime '1977-03-02T02:20:31.000+00:00' + - !bson_utcdatetime '1977-03-01T00:00:00.000+00:00' + - !bson_utcdatetime '1977-05-06T21:57:35.000+00:00' + - + $project: + _id: 0 + name: 1 + birthdate: 1 + + - + name: 'Array Value Field Match' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/#examples' + pipeline: + - + $search: + in: + path: 'accounts' + value: + - 371138 + - 371139 + - 371140 + - + $project: + _id: 0 + name: 1 + accounts: 1 + + - + name: 'Compound Query Match' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/#examples' + pipeline: + - + $search: + compound: + must: + - + in: + path: 'name' + value: + - 'james sanchez' + - 'jennifer lawrence' + should: + - + in: + path: '_id' + value: + - !bson_objectId '5ca4bbcea2dd94ee58162a72' + - !bson_objectId '5ca4bbcea2dd94ee58162a91' + - + $limit: 5 + - + $project: + _id: 1 + name: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/moreLikeThis.yaml b/etc/gen_atlas_search/yaml/search/moreLikeThis.yaml new file mode 100644 index 000000000..8c4803bdd --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/moreLikeThis.yaml @@ -0,0 +1,99 @@ +# $schema: ../schema.json +name: moreLikeThis +link: 'https://www.mongodb.com/docs/atlas/atlas-search/moreLikeThis/' +type: + - searchOperator +encode: object +description: | + The moreLikeThis operator returns documents similar to input documents. + The moreLikeThis operator allows you to build features for your applications + that display similar or alternative results based on one or more given documents. +arguments: + - + name: like + type: + - object + - array # of object + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Single Document with Multiple Fields' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/#example-1--single-document-with-multiple-fields' + pipeline: + - + $search: + moreLikeThis: + like: + title: 'The Godfather' + genres: 'action' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + released: 1 + genres: 1 + + - + name: 'Input Document Excluded in Results' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/#example-2--input-document-excluded-in-results' + pipeline: + - + $search: + compound: + must: + - + moreLikeThis: + like: + _id: !bson_objectId '573a1396f29313caabce4a9a' + genres: + - 'Crime' + - 'Drama' + title: 'The Godfather' + mustNot: + - + equals: + path: '_id' + value: !bson_objectId '573a1396f29313caabce4a9a' + - + $limit: 5 + - + $project: + _id: 1 + title: 1 + released: 1 + genres: 1 + + - + name: 'Multiple Analyzers' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/#example-3--multiple-analyzers' + pipeline: + - + $search: + compound: + should: + - + moreLikeThis: + like: + _id: !bson_objectId '573a1396f29313caabce4a9a' + genres: + - 'Crime' + - 'Drama' + title: 'The Godfather' + mustNot: + - + equals: + path: '_id' + value: !bson_objectId '573a1394f29313caabcde9ef' + - + $limit: 10 + - + $project: + title: 1 + genres: 1 + _id: 1 diff --git a/etc/gen_atlas_search/yaml/search/near.yaml b/etc/gen_atlas_search/yaml/search/near.yaml new file mode 100644 index 000000000..bd4119cf9 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/near.yaml @@ -0,0 +1,124 @@ +# $schema: ../schema.json +name: near +link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/' +type: + - searchOperator +encode: object +description: | + The near operator supports querying and scoring numeric, date, and GeoJSON point values. +arguments: + - + name: path + type: + - searchPath + - + name: origin + type: + - date + - number + - geometry + - + name: pivot + type: + - number + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Number' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#number-example' + pipeline: + - + $search: + index: 'runtimes' + near: + path: 'runtime' + origin: 279 + pivot: 2 + - + $limit: 7 + - + $project: + _id: 0 + title: 1 + runtime: 1 + score: + $meta: 'searchScore' + + - + name: 'Date' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#date-example' + pipeline: + - + $search: + index: 'releaseddate' + near: + path: 'released' + origin: !bson_utcdatetime '1915-09-13T00:00:00.000+00:00' + pivot: 7776000000 + - + $limit: 3 + - + $project: + _id: 0 + title: 1 + released: 1 + score: + $meta: 'searchScore' + + - + name: 'GeoJSON Point' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#geojson-point-examples' + pipeline: + - + $search: + near: + origin: + type: 'Point' + coordinates: + - -8.61308 + - 41.1413 + pivot: 1000 + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' + + - + name: 'Compound' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#compound-example' + pipeline: + - + $search: + compound: + must: + text: + query: 'Apartment' + path: 'property_type' + should: + near: + origin: + type: 'Point' + coordinates: + - 114.15027 + - 22.28158 + pivot: 1000 + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + property_type: 1 + address: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/phrase.yaml b/etc/gen_atlas_search/yaml/search/phrase.yaml new file mode 100644 index 000000000..4d9b75c4e --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/phrase.yaml @@ -0,0 +1,109 @@ +# $schema: ../schema.json +name: phrase +link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/' +type: + - searchOperator +encode: object +description: | + The phrase operator performs search for documents containing an ordered sequence of terms using the analyzer specified in the index configuration. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - array # of string + - + name: slop + optional: true + type: + - int + - + name: synonyms + optional: true + type: + - string + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Single Phrase' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#single-phrase-example' + pipeline: + - + $search: + phrase: + path: 'title' + query: 'new york' + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Multiple Phrase' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#multiple-phrases-example' + pipeline: + - + $search: + phrase: + path: 'title' + query: + - 'the man' + - 'the moon' + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Phrase Slop' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#slop-example' + pipeline: + - + $search: + phrase: + path: 'title' + query: 'men women' + slop: 5 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Phrase Synonyms' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#synonyms-example' + pipeline: + - + $search: + phrase: + path: 'plot' + query: 'automobile race' + slop: 5 + synonyms: 'my_synonyms' + - + $limit: 5 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/queryString.yaml b/etc/gen_atlas_search/yaml/search/queryString.yaml new file mode 100644 index 000000000..8202771c9 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/queryString.yaml @@ -0,0 +1,35 @@ +# $schema: ../schema.json +name: queryString +link: 'https://www.mongodb.com/docs/atlas/atlas-search/queryString/' +type: + - searchOperator +encode: object +description: | + +arguments: + - + name: defaultPath + type: + - searchPath + - + name: query + type: + - string + +# The various example from the doc are variations of the "query" parameter +# this is not pertinent for testing the aggregation builder, unless we create +# a queryString builder. +tests: + - + name: 'Boolean Operator Queries' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/queryString/#boolean-operator-queries' + pipeline: + - + $search: + queryString: + defaultPath: 'title' + query: 'Rocky AND (IV OR 4 OR Four)' + - + $project: + _id: 0 + title: 1 diff --git a/etc/gen_atlas_search/yaml/search/range.yaml b/etc/gen_atlas_search/yaml/search/range.yaml new file mode 100644 index 000000000..f42c69176 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/range.yaml @@ -0,0 +1,139 @@ +# $schema: ../schema.json +name: range +link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/' +type: + - searchOperator +encode: object +description: | + The range operator supports querying and scoring numeric, date, and string values. + You can use this operator to find results that are within a given numeric, date, objectId, or letter (from the English alphabet) range. +arguments: + - + name: path + type: + - searchPath + - + name: gt + optional: true + type: + - date + - number + - string + - objectId + - + name: gte + optional: true + type: + - date + - number + - string + - objectId + - + name: lt + optional: true + type: + - date + - number + - string + - objectId + - + name: lte + optional: true + type: + - date + - number + - string + - objectId + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Number gte lte' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#number-example' + pipeline: + - + $search: + range: + path: 'runtime' + gte: 2 + lte: 3 + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + runtime: 1 + + - + name: 'Number lte' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#number-example' + pipeline: + - + $search: + range: + path: 'runtime' + lte: 2 + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + runtime: 1 + score: + $meta: 'searchScore' + + - + name: 'Date' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#date-example' + pipeline: + - + $search: + range: + path: 'released' + gt: !bson_utcdatetime '2010-01-01T00:00:00.000Z' + lt: !bson_utcdatetime '2015-01-01T00:00:00.000Z' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + released: 1 + + - + name: 'ObjectId' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#objectid-example' + pipeline: + - + $search: + range: + path: '_id' + gte: !bson_objectId '573a1396f29313caabce4a9a' + lte: !bson_objectId '573a1396f29313caabce4ae7' + - + $project: + _id: 1 + title: 1 + released: 1 + + - + name: 'String' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#string-example' + pipeline: + - + $search: + range: + path: 'title' + gt: 'city' + lt: 'country' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 diff --git a/etc/gen_atlas_search/yaml/search/regex.yaml b/etc/gen_atlas_search/yaml/search/regex.yaml new file mode 100644 index 000000000..869ffabde --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/regex.yaml @@ -0,0 +1,42 @@ +# $schema: ../schema.json +name: regex +link: 'https://www.mongodb.com/docs/atlas/atlas-search/regex/' +type: + - searchOperator +encode: object +description: | + regex interprets the query field as a regular expression. + regex is a term-level operator, meaning that the query field isn't analyzed. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: allowAnalyzedField + optional: true + type: + - bool + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Regex' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/regex/#examples' + pipeline: + - + $search: + regex: + path: 'title' + query: '[0-9]{2} (.){4}s' + - + $project: + _id: 0 + title: 1 diff --git a/etc/gen_atlas_search/yaml/search/text.yaml b/etc/gen_atlas_search/yaml/search/text.yaml new file mode 100644 index 000000000..dbd48cdd0 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/text.yaml @@ -0,0 +1,194 @@ +# $schema: ../schema.json +name: text +link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/' +type: + - searchOperator +encode: object +description: | + The text operator performs a full-text search using the analyzer that you specify in the index configuration. + If you omit an analyzer, the text operator uses the default standard analyzer. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: fuzzy + optional: true + type: + - object + - + name: matchCriteria + optional: true + type: + - string # "any" | "all" + - + name: synonyms + optional: true + type: + - string + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#basic-example' + pipeline: + - + $search: + text: + path: 'title' + query: 'surfer' + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + - + name: 'Fuzzy Default' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#fuzzy-examples' + pipeline: + - + $search: + text: + path: 'title' + query: 'naw yark' + fuzzy: {} + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Fuzzy maxExpansions' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#fuzzy-examples' + pipeline: + - + $search: + text: + path: 'title' + query: 'naw yark' + fuzzy: + maxEdits: 1 + maxExpansions: 100 + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Fuzzy prefixLength' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#fuzzy-examples' + pipeline: + - + $search: + text: + path: 'title' + query: 'naw yark' + fuzzy: + maxEdits: 1 + prefixLength: 2 + - + $limit: 8 + - + $project: + _id: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Match any Using equivalent Mapping' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#match-any-using-equivalent-mapping' + pipeline: + - + $search: + text: + path: 'plot' + query: 'attire' + synonyms: 'my_synonyms' + matchCriteria: 'any' + - + $limit: 5 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Match any Using explicit Mapping' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#match-any-using-explicit-mapping' + pipeline: + - + $search: + text: + path: 'plot' + query: 'boat race' + synonyms: 'my_synonyms' + matchCriteria: 'any' + - + $limit: 10 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Match all Using Synonyms' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#match-all-using-synonyms' + pipeline: + - + $search: + text: + path: 'plot' + query: 'automobile race' + matchCriteria: 'all' + synonyms: 'my_synonyms' + - + $limit: 20 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Wildcard Path' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/' + pipeline: + - + $search: + text: + path: + wildcard: '*' + query: 'surfer' + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/wildcard.yaml b/etc/gen_atlas_search/yaml/search/wildcard.yaml new file mode 100644 index 000000000..d17fb4803 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/wildcard.yaml @@ -0,0 +1,60 @@ +# $schema: ../schema.json +name: wildcard +link: 'https://www.mongodb.com/docs/atlas/atlas-search/wildcard/' +type: + - searchOperator +encode: object +description: | + The wildcard operator enables queries which use special characters in the search string that can match any character. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: allowAnalyzedField + optional: true + type: + - bool + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Wildcard Path' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/wildcard/#index-definition' + pipeline: + - + $search: + wildcard: + query: 'Wom?n *' + path: + wildcard: '*' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + + - + name: 'Escape Character Example' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/wildcard/#escape-character-example' + pipeline: + - + $search: + wildcard: + query: '*\?' + path: 'title' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 From 1336addf3e2c78b04df8b190145cea5c483aad83 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 14 Aug 2025 14:20:36 +0100 Subject: [PATCH 03/15] substantial codegen --- etc/gen_atlas_search/Cargo.lock | 30 ++++++++ etc/gen_atlas_search/Cargo.toml | 5 ++ etc/gen_atlas_search/src/main.rs | 128 ++++++++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 2 deletions(-) diff --git a/etc/gen_atlas_search/Cargo.lock b/etc/gen_atlas_search/Cargo.lock index 4d93cdd47..f3356e5cd 100644 --- a/etc/gen_atlas_search/Cargo.lock +++ b/etc/gen_atlas_search/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "convert_case" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -12,8 +21,13 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" name = "gen_atlas_search" version = "0.1.0" dependencies = [ + "convert_case", + "prettyplease", + "proc-macro2", + "quote", "serde", "serde_yaml", + "syn", ] [[package]] @@ -38,6 +52,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "prettyplease" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.97" @@ -112,6 +136,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unsafe-libyaml" version = "0.2.11" diff --git a/etc/gen_atlas_search/Cargo.toml b/etc/gen_atlas_search/Cargo.toml index c7a240688..0f504c7db 100644 --- a/etc/gen_atlas_search/Cargo.toml +++ b/etc/gen_atlas_search/Cargo.toml @@ -4,5 +4,10 @@ version = "0.1.0" edition = "2024" [dependencies] +convert_case = "0.8.0" +prettyplease = "0.2.36" +proc-macro2 = "1.0.97" +quote = "1.0.40" serde = { version = "1.0.219", features = ["derive"] } serde_yaml = "0.9.34" +syn = { version = "2.0.105", features = ["full", "printing"] } diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index ece2285e3..a2cad9da4 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -1,13 +1,30 @@ +use convert_case::{Case, Casing}; +use proc_macro2::TokenStream; +use quote::format_ident; use serde::Deserialize; +use syn::parse_quote; #[derive(Debug, Deserialize)] -#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] struct Operator { name: String, + #[expect(dead_code)] link: String, #[serde(rename = "type")] + #[expect(dead_code)] type_: Vec, + #[expect(dead_code)] encode: EncodeType, + description: String, + arguments: Vec, + tests: Vec, +} + +impl Operator { + fn clear_tests(mut self) -> Self { + self.tests.clear(); + self + } } #[derive(Debug, Deserialize)] @@ -22,6 +39,113 @@ enum EncodeType { Object, } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +struct Argument { + name: String, + #[serde(default)] + optional: Option, + #[serde(rename = "type")] + type_: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +enum ArgumentType { + String, + Object, + SearchScore, + SearchPath, +} + +impl Argument { + fn type_(&self) -> syn::Type { + if self.type_.len() != 1 { + panic!("Unexpected argument types: {:?}", self.type_); + } + match &self.type_[0] { + ArgumentType::String => parse_quote! { impl AsRef }, + ArgumentType::Object => parse_quote! { Document }, + ArgumentType::SearchScore => parse_quote! { Document }, + ArgumentType::SearchPath => parse_quote! { impl StringOrArray }, + } + } + + fn bson_expr(&self, ident: &syn::Ident) -> syn::Expr { + if self.type_.len() != 1 { + panic!("Unexpected argument types: {:?}", self.type_); + } + match &self.type_[0] { + ArgumentType::String => parse_quote! { #ident.as_ref() }, + ArgumentType::SearchPath => parse_quote! { #ident.to_bson() }, + _ => parse_quote! { #ident }, + } + } +} + +trait TokenStreamExt { + fn push(&mut self, other: TokenStream); +} + +impl TokenStreamExt for TokenStream { + fn push(&mut self, other: TokenStream) { + self.extend(other); + } +} + +fn gen_from_yaml(p: impl AsRef) -> String { + let contents = std::fs::read_to_string(p).unwrap(); + let parsed = serde_yaml::from_str::(&contents) + .unwrap() + .clear_tests(); + + let name_text = parsed.name; + let name_ident = format_ident!("{}", name_text.to_case(Case::Pascal)); + + let mut required_args = TokenStream::new(); + let mut init_doc = TokenStream::new(); + let mut setters = TokenStream::new(); + + for arg in parsed.arguments { + let ident = format_ident!("{}", arg.name.to_case(Case::Snake)); + let type_ = arg.type_(); + let arg_name = &arg.name; + let init_expr = arg.bson_expr(&ident); + + if arg.optional.unwrap_or(false) { + setters.push(parse_quote! { + pub fn #ident(mut self, #ident: #type_) -> Self { + self.stage.insert(#arg_name, #init_expr); + self + } + }); + } else { + required_args.push(parse_quote! { #ident : #type_, }); + init_doc.push(parse_quote! { #arg_name : #init_expr, }); + } + } + + let desc = parsed.description; + let output: syn::File = parse_quote! { + pub struct #name_ident; + + impl AtlasSearch<#name_ident> { + #[doc = #desc] + pub fn #name_ident(#required_args) -> Self { + Autocomplete { + name: #name_text, + stage: doc! { #init_doc }, + _t: PhantomData::default(), + } + } + #setters + } + }; + + prettyplease::unparse(&output) +} + fn main() { - println!("Hello, world!"); + let text = gen_from_yaml("yaml/search/autocomplete.yaml"); + println!("{text}"); } From e22672d77b015c2639e09ef6eb477b3c27d27708 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 14 Aug 2025 14:58:21 +0100 Subject: [PATCH 04/15] plug codegen into module structure, add more docs --- etc/gen_atlas_search/src/main.rs | 9 +- etc/gen_atlas_search/update-yaml.sh | 2 +- src/action.rs | 1 - src/{action/search.rs => atlas_search.rs} | 147 +++++++--------------- src/atlas_search/gen.rs | 34 +++++ src/lib.rs | 1 + 6 files changed, 91 insertions(+), 103 deletions(-) rename src/{action/search.rs => atlas_search.rs} (62%) create mode 100644 src/atlas_search/gen.rs diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index a2cad9da4..9afaeb98f 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -101,6 +101,7 @@ fn gen_from_yaml(p: impl AsRef) -> String { let name_text = parsed.name; let name_ident = format_ident!("{}", name_text.to_case(Case::Pascal)); + let constr_ident = format_ident!("{}", name_text.to_case(Case::Snake)); let mut required_args = TokenStream::new(); let mut init_doc = TokenStream::new(); @@ -114,6 +115,7 @@ fn gen_from_yaml(p: impl AsRef) -> String { if arg.optional.unwrap_or(false) { setters.push(parse_quote! { + #[allow(missing_docs)] pub fn #ident(mut self, #ident: #type_) -> Self { self.stage.insert(#arg_name, #init_expr); self @@ -127,12 +129,15 @@ fn gen_from_yaml(p: impl AsRef) -> String { let desc = parsed.description; let output: syn::File = parse_quote! { + use super::*; + + #[allow(missing_docs)] pub struct #name_ident; impl AtlasSearch<#name_ident> { #[doc = #desc] - pub fn #name_ident(#required_args) -> Self { - Autocomplete { + pub fn #constr_ident(#required_args) -> Self { + AtlasSearch { name: #name_text, stage: doc! { #init_doc }, _t: PhantomData::default(), diff --git a/etc/gen_atlas_search/update-yaml.sh b/etc/gen_atlas_search/update-yaml.sh index c2174721b..dbec840cb 100755 --- a/etc/gen_atlas_search/update-yaml.sh +++ b/etc/gen_atlas_search/update-yaml.sh @@ -14,4 +14,4 @@ cd "${basedir}" mkdir -p "yaml/search" rsync -ah "${tmpdir}/generator/config/search/" "yaml/search" --delete -#rm -rf "${tmpdir}" \ No newline at end of file +rm -rf "${tmpdir}" \ No newline at end of file diff --git a/src/action.rs b/src/action.rs index a5ed56b04..91ebc6fd4 100644 --- a/src/action.rs +++ b/src/action.rs @@ -23,7 +23,6 @@ mod list_indexes; mod perf; mod replace_one; mod run_command; -mod search; mod search_index; mod session; mod shutdown; diff --git a/src/action/search.rs b/src/atlas_search.rs similarity index 62% rename from src/action/search.rs rename to src/atlas_search.rs index f38f02630..85bb798d3 100644 --- a/src/action/search.rs +++ b/src/atlas_search.rs @@ -1,7 +1,37 @@ +//! Helpers for building Atlas Search aggregation pipelines. + +mod gen; + +pub use gen::*; + use std::marker::PhantomData; use crate::bson::{doc, Bson, Document}; +/// A helper to build the aggregation stage for Atlas Search. Use one of the constructor functions +/// and chain optional value setters, and then convert to a pipeline stage [`Document`] via +/// [`into`](Into::into) or [`on_index`](AtlasSearch::on_index). +/// +/// ```no_run +/// # async fn wrapper() -> mongodb::error::Error { +/// # use mongodb::{Collection, bson::{Document, doc}}; +/// # let collection: Collection = todo!() +/// let cursor = coll.aggregate(vec![ +/// AtlasSearch::autocomplete("pre", "title") +/// .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) +/// .into(), +/// doc! { +/// "$limit": 10, +/// }, +/// doc! { +/// "$project": { +/// "_id": 0, +/// "title": 1, +/// } +/// }, +/// ]).await?; +/// # Ok(()) +/// # } pub struct AtlasSearch { name: &'static str, stage: Document, @@ -18,12 +48,12 @@ impl Into for AtlasSearch { } } -pub struct Autocomplete; -pub struct Compound; -pub struct Text; +#[allow(missing_docs)] pub struct Built; impl AtlasSearch { + /// Finalize this builder. Not typically needed, but can be useful to include builders of + /// different types in a single `Vec`. pub fn build(self) -> AtlasSearch { AtlasSearch { name: self.name, @@ -32,13 +62,15 @@ impl AtlasSearch { } } + /// Like [`into`](Into::into), converts this builder into an aggregate pipeline stage + /// [`Document`], but also specify the search index to use. pub fn on_index(self, index: impl AsRef) -> Document { - let mut out: Document = self.into(); - // unwrap safety: AtlasSearch::into always produces a "$search" value - out.get_document_mut("$search") - .unwrap() - .insert("index", index.as_ref()); - out + doc! { + "$search": { + "index": index.as_ref(), + self.name: self.stage, + } + } } } @@ -52,39 +84,6 @@ impl IntoIterator for AtlasSearch { } } -impl AtlasSearch { - /// Perform a search for a word or phrase that contains a sequence of characters from an - /// incomplete input string. - pub fn autocomplete(query: impl StringOrArray, path: impl AsRef) -> Self { - AtlasSearch { - name: "autocomplete", - stage: doc! { - "query": query.to_bson(), - "path": path.as_ref(), - }, - _t: PhantomData::default(), - } - } - - /// Enable fuzzy search. Find strings which are similar to the search term or terms. - pub fn fuzzy(mut self, options: Document) -> Self { - self.stage.insert("fuzzy", options); - self - } - - /// Score to assign to the matching search term results. - pub fn score(mut self, options: Document) -> Self { - self.stage.insert("score", options); - self - } - - /// Order in which to search for tokens. - pub fn token_order(mut self, order: TokenOrder) -> Self { - self.stage.insert("tokenOrder", order.name()); - self - } -} - /// Order in which to search for tokens. pub enum TokenOrder { /// Indicates tokens in the query can appear in any order in the documents. @@ -103,65 +102,11 @@ impl TokenOrder { } } -impl AtlasSearch { - pub fn compound() -> Self { - AtlasSearch { - name: "compound", - stage: doc! {}, - _t: PhantomData::default(), - } - } - - pub fn must(mut self, clauses: impl IntoIterator>) -> Self { - self.stage.insert( - "must", - clauses.into_iter().map(|sq| sq.stage).collect::>(), - ); - self - } - - pub fn must_not(mut self, clauses: impl IntoIterator>) -> Self { - self.stage.insert( - "mustNot", - clauses.into_iter().map(|sq| sq.stage).collect::>(), - ); - self - } - - pub fn should(mut self, clauses: impl IntoIterator>) -> Self { - self.stage.insert( - "should", - clauses.into_iter().map(|sq| sq.stage).collect::>(), - ); - self - } -} - -impl AtlasSearch { - pub fn text(query: impl StringOrArray, path: impl StringOrArray) -> Self { - AtlasSearch { - name: "text", - stage: doc! { - "query": query.to_bson(), - "path": path.to_bson(), - }, - _t: PhantomData::default(), - } - } - - pub fn fuzzy(mut self, options: Document) -> Self { - self.stage.insert("fuzzy", options); - self - } - - pub fn match_criteria(mut self, criteria: MatchCriteria) -> Self { - self.stage.insert("matchCriteria", criteria.name()); - self - } -} - +/// Criteria to use to match the terms in the query. pub enum MatchCriteria { + /// Return documents that contain any of the terms from the query field. Any, + /// Only return documents that contain all of the terms from the query field. All, } @@ -174,7 +119,9 @@ impl MatchCriteria { } } +/// An Atlas Search operator parameter that can be either a string or array of strings. pub trait StringOrArray { + #[allow(missing_docs)] fn to_bson(self) -> Bson; } @@ -260,6 +207,7 @@ impl StringOrArray for Vec<&String> { } } +/* #[test] fn api_flow() { let coll: crate::Collection = todo!(); @@ -303,3 +251,4 @@ fn api_flow() { ]); } } +*/ diff --git a/src/atlas_search/gen.rs b/src/atlas_search/gen.rs new file mode 100644 index 000000000..2482204cc --- /dev/null +++ b/src/atlas_search/gen.rs @@ -0,0 +1,34 @@ +use super::*; +#[allow(missing_docs)] +pub struct Autocomplete; +impl AtlasSearch { + /**The autocomplete operator performs a search for a word or phrase that + contains a sequence of characters from an incomplete input string. The + fields that you intend to query with the autocomplete operator must be + indexed with the autocomplete data type in the collection's index definition. + */ + pub fn autocomplete(path: impl StringOrArray, query: impl AsRef) -> Self { + AtlasSearch { + name: "autocomplete", + stage: doc! { + "path" : path.to_bson(), "query" : query.as_ref(), + }, + _t: PhantomData::default(), + } + } + #[allow(missing_docs)] + pub fn token_order(mut self, token_order: impl AsRef) -> Self { + self.stage.insert("tokenOrder", token_order.as_ref()); + self + } + #[allow(missing_docs)] + pub fn fuzzy(mut self, fuzzy: Document) -> Self { + self.stage.insert("fuzzy", fuzzy); + self + } + #[allow(missing_docs)] + pub fn score(mut self, score: Document) -> Self { + self.stage.insert("score", score); + self + } +} diff --git a/src/lib.rs b/src/lib.rs index 7f0ab39e3..d41b2da1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ pub mod options; pub use ::mongocrypt; pub mod action; +pub mod atlas_search; pub(crate) mod bson_compat; mod bson_util; pub mod change_stream; From 8d033e149914725921d0830d08b189a742ba06ad Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 11:09:53 +0100 Subject: [PATCH 05/15] defer stringification --- etc/gen_atlas_search/src/main.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index 9afaeb98f..2c4b285db 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -93,7 +93,7 @@ impl TokenStreamExt for TokenStream { } } -fn gen_from_yaml(p: impl AsRef) -> String { +fn gen_from_yaml(p: impl AsRef) -> TokenStream { let contents = std::fs::read_to_string(p).unwrap(); let parsed = serde_yaml::from_str::(&contents) .unwrap() @@ -128,9 +128,7 @@ fn gen_from_yaml(p: impl AsRef) -> String { } let desc = parsed.description; - let output: syn::File = parse_quote! { - use super::*; - + parse_quote! { #[allow(missing_docs)] pub struct #name_ident; @@ -145,12 +143,20 @@ fn gen_from_yaml(p: impl AsRef) -> String { } #setters } - }; - - prettyplease::unparse(&output) + } } fn main() { - let text = gen_from_yaml("yaml/search/autocomplete.yaml"); + let mut operators = TokenStream::new(); + for path in ["yaml/search/autocomplete.yaml"] { + operators.push(gen_from_yaml(path)); + } + + let file = parse_quote! { + use super::*; + + #operators + }; + let text = prettyplease::unparse(&file); println!("{text}"); } From d92cca6994143747f69eb5da3f259b9164e66fbf Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 11:25:00 +0100 Subject: [PATCH 06/15] TokenOrder --- etc/gen_atlas_search/src/main.rs | 11 +++++++++++ src/atlas_search.rs | 6 +++++- src/atlas_search/gen.rs | 5 +++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index 2c4b285db..150069682 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -58,8 +58,13 @@ enum ArgumentType { SearchPath, } +static TOKEN_ORDER: &str = "tokenOrder"; + impl Argument { fn type_(&self) -> syn::Type { + if self.name == TOKEN_ORDER { + return parse_quote! { TokenOrder }; + } if self.type_.len() != 1 { panic!("Unexpected argument types: {:?}", self.type_); } @@ -72,6 +77,9 @@ impl Argument { } fn bson_expr(&self, ident: &syn::Ident) -> syn::Expr { + if self.name == TOKEN_ORDER { + return parse_quote! { #ident.name() }; + } if self.type_.len() != 1 { panic!("Unexpected argument types: {:?}", self.type_); } @@ -83,6 +91,8 @@ impl Argument { } } +// Type inference helper: TokenStream impls Extend for both TokenTree and TokenStream, so calling +// `stream.extend(parse_quote! { blah })` is ambiguous, where `stream.push(...)` is not. trait TokenStreamExt { fn push(&mut self, other: TokenStream); } @@ -153,6 +163,7 @@ fn main() { } let file = parse_quote! { + //! This file was autogenerated. Do not manually edit. use super::*; #operators diff --git a/src/atlas_search.rs b/src/atlas_search.rs index 85bb798d3..0567cdc2e 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -85,19 +85,23 @@ impl IntoIterator for AtlasSearch { } /// Order in which to search for tokens. +#[derive(Clone, PartialEq)] pub enum TokenOrder { /// Indicates tokens in the query can appear in any order in the documents. Any, /// Indicates tokens in the query must appear adjacent to each other or in the order specified /// in the query in the documents. Sequential, + /// Fallback for future compatibility. + Other(String), } impl TokenOrder { - fn name(&self) -> &'static str { + fn name(&self) -> &str { match self { Self::Any => "any", Self::Sequential => "sequential", + Self::Other(s) => s.as_str(), } } } diff --git a/src/atlas_search/gen.rs b/src/atlas_search/gen.rs index 2482204cc..cf5a8e05a 100644 --- a/src/atlas_search/gen.rs +++ b/src/atlas_search/gen.rs @@ -1,3 +1,4 @@ +//! This file was autogenerated. Do not manually edit. use super::*; #[allow(missing_docs)] pub struct Autocomplete; @@ -17,8 +18,8 @@ impl AtlasSearch { } } #[allow(missing_docs)] - pub fn token_order(mut self, token_order: impl AsRef) -> Self { - self.stage.insert("tokenOrder", token_order.as_ref()); + pub fn token_order(mut self, token_order: TokenOrder) -> Self { + self.stage.insert("tokenOrder", token_order.name()); self } #[allow(missing_docs)] From 635ae827fcd0f655a6da656cd82b96f0b196e696 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 11:55:38 +0100 Subject: [PATCH 07/15] text and supporting changes --- etc/gen_atlas_search/regenerate.sh | 9 +++++ etc/gen_atlas_search/src/main.rs | 62 +++++++++++++++++++++--------- src/atlas_search.rs | 8 +++- src/atlas_search/gen.rs | 40 ++++++++++++++++++- 4 files changed, 96 insertions(+), 23 deletions(-) create mode 100755 etc/gen_atlas_search/regenerate.sh diff --git a/etc/gen_atlas_search/regenerate.sh b/etc/gen_atlas_search/regenerate.sh new file mode 100755 index 000000000..c1d4ae43d --- /dev/null +++ b/etc/gen_atlas_search/regenerate.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -o errexit +set -x + +gen_path="$(dirname $0)/../../src/atlas_search/gen.rs" + +cargo run > ${gen_path} +rustfmt +nightly --unstable-features ${gen_path} \ No newline at end of file diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index 150069682..d96b2693a 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -58,35 +58,58 @@ enum ArgumentType { SearchPath, } +enum ArgumentRustType { + String, + Document, + StringOrArray, + TokenOrder, + MatchCriteria, +} + +static QUERY: &str = "query"; static TOKEN_ORDER: &str = "tokenOrder"; +static MATCH_CRITERIA: &str = "matchCriteria"; impl Argument { - fn type_(&self) -> syn::Type { + fn rust_type(&self) -> ArgumentRustType { + if self.name == QUERY { + return ArgumentRustType::StringOrArray; + } if self.name == TOKEN_ORDER { - return parse_quote! { TokenOrder }; + return ArgumentRustType::TokenOrder; + } + if self.name == MATCH_CRITERIA { + return ArgumentRustType::MatchCriteria; } if self.type_.len() != 1 { panic!("Unexpected argument types: {:?}", self.type_); } match &self.type_[0] { - ArgumentType::String => parse_quote! { impl AsRef }, - ArgumentType::Object => parse_quote! { Document }, - ArgumentType::SearchScore => parse_quote! { Document }, - ArgumentType::SearchPath => parse_quote! { impl StringOrArray }, + ArgumentType::String => ArgumentRustType::String, + ArgumentType::Object => ArgumentRustType::Document, + ArgumentType::SearchScore => ArgumentRustType::Document, + ArgumentType::SearchPath => ArgumentRustType::StringOrArray, } } +} - fn bson_expr(&self, ident: &syn::Ident) -> syn::Expr { - if self.name == TOKEN_ORDER { - return parse_quote! { #ident.name() }; +impl ArgumentRustType { + fn tokens(&self) -> syn::Type { + match self { + Self::String => parse_quote! { impl AsRef }, + Self::Document => parse_quote! { Document }, + Self::StringOrArray => parse_quote! { impl StringOrArray }, + Self::TokenOrder => parse_quote! { TokenOrder }, + Self::MatchCriteria => parse_quote! { MatchCriteria }, } - if self.type_.len() != 1 { - panic!("Unexpected argument types: {:?}", self.type_); - } - match &self.type_[0] { - ArgumentType::String => parse_quote! { #ident.as_ref() }, - ArgumentType::SearchPath => parse_quote! { #ident.to_bson() }, - _ => parse_quote! { #ident }, + } + + fn bson_expr(&self, ident: &syn::Ident) -> syn::Expr { + match self { + Self::String => parse_quote! { #ident.as_ref() }, + Self::StringOrArray => parse_quote! { #ident.to_bson() }, + Self::TokenOrder | Self::MatchCriteria => parse_quote! { #ident.name() }, + Self::Document => parse_quote! { #ident }, } } } @@ -119,9 +142,10 @@ fn gen_from_yaml(p: impl AsRef) -> TokenStream { for arg in parsed.arguments { let ident = format_ident!("{}", arg.name.to_case(Case::Snake)); - let type_ = arg.type_(); + let rust_type = arg.rust_type(); + let type_ = rust_type.tokens(); let arg_name = &arg.name; - let init_expr = arg.bson_expr(&ident); + let init_expr = rust_type.bson_expr(&ident); if arg.optional.unwrap_or(false) { setters.push(parse_quote! { @@ -158,7 +182,7 @@ fn gen_from_yaml(p: impl AsRef) -> TokenStream { fn main() { let mut operators = TokenStream::new(); - for path in ["yaml/search/autocomplete.yaml"] { + for path in ["yaml/search/autocomplete.yaml", "yaml/search/text.yaml"] { operators.push(gen_from_yaml(path)); } diff --git a/src/atlas_search.rs b/src/atlas_search.rs index 0567cdc2e..f44467ec0 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -85,7 +85,7 @@ impl IntoIterator for AtlasSearch { } /// Order in which to search for tokens. -#[derive(Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum TokenOrder { /// Indicates tokens in the query can appear in any order in the documents. Any, @@ -107,18 +107,22 @@ impl TokenOrder { } /// Criteria to use to match the terms in the query. +#[derive(Debug, Clone, PartialEq)] pub enum MatchCriteria { /// Return documents that contain any of the terms from the query field. Any, /// Only return documents that contain all of the terms from the query field. All, + /// Fallback for future compatibility. + Other(String), } impl MatchCriteria { - fn name(&self) -> &'static str { + fn name(&self) -> &str { match self { Self::Any => "any", Self::All => "all", + Self::Other(s) => s.as_str(), } } } diff --git a/src/atlas_search/gen.rs b/src/atlas_search/gen.rs index cf5a8e05a..f1a73c77e 100644 --- a/src/atlas_search/gen.rs +++ b/src/atlas_search/gen.rs @@ -8,11 +8,11 @@ impl AtlasSearch { fields that you intend to query with the autocomplete operator must be indexed with the autocomplete data type in the collection's index definition. */ - pub fn autocomplete(path: impl StringOrArray, query: impl AsRef) -> Self { + pub fn autocomplete(path: impl StringOrArray, query: impl StringOrArray) -> Self { AtlasSearch { name: "autocomplete", stage: doc! { - "path" : path.to_bson(), "query" : query.as_ref(), + "path" : path.to_bson(), "query" : query.to_bson(), }, _t: PhantomData::default(), } @@ -33,3 +33,39 @@ impl AtlasSearch { self } } +#[allow(missing_docs)] +pub struct Text; +impl AtlasSearch { + /**The text operator performs a full-text search using the analyzer that you specify in the index configuration. + If you omit an analyzer, the text operator uses the default standard analyzer. + */ + pub fn text(path: impl StringOrArray, query: impl StringOrArray) -> Self { + AtlasSearch { + name: "text", + stage: doc! { + "path" : path.to_bson(), "query" : query.to_bson(), + }, + _t: PhantomData::default(), + } + } + #[allow(missing_docs)] + pub fn fuzzy(mut self, fuzzy: Document) -> Self { + self.stage.insert("fuzzy", fuzzy); + self + } + #[allow(missing_docs)] + pub fn match_criteria(mut self, match_criteria: MatchCriteria) -> Self { + self.stage.insert("matchCriteria", match_criteria.name()); + self + } + #[allow(missing_docs)] + pub fn synonyms(mut self, synonyms: impl AsRef) -> Self { + self.stage.insert("synonyms", synonyms.as_ref()); + self + } + #[allow(missing_docs)] + pub fn score(mut self, score: Document) -> Self { + self.stage.insert("score", score); + self + } +} From ba5feb17e3a2b215adfddceed1f66b3c3fb47526 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 12:30:36 +0100 Subject: [PATCH 08/15] compound and supporting changes --- etc/gen_atlas_search/src/main.rs | 44 ++++++++++++----- src/atlas_search.rs | 85 +++++++++++++++++--------------- src/atlas_search/gen.rs | 69 ++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 51 deletions(-) diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index d96b2693a..38918aba4 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -56,6 +56,9 @@ enum ArgumentType { Object, SearchScore, SearchPath, + SearchOperator, + Array, + Int, } enum ArgumentRustType { @@ -64,6 +67,8 @@ enum ArgumentRustType { StringOrArray, TokenOrder, MatchCriteria, + Operator, + I32, } static QUERY: &str = "query"; @@ -81,14 +86,14 @@ impl Argument { if self.name == MATCH_CRITERIA { return ArgumentRustType::MatchCriteria; } - if self.type_.len() != 1 { - panic!("Unexpected argument types: {:?}", self.type_); - } - match &self.type_[0] { - ArgumentType::String => ArgumentRustType::String, - ArgumentType::Object => ArgumentRustType::Document, - ArgumentType::SearchScore => ArgumentRustType::Document, - ArgumentType::SearchPath => ArgumentRustType::StringOrArray, + match self.type_.as_slice() { + [ArgumentType::String] => ArgumentRustType::String, + [ArgumentType::Object] => ArgumentRustType::Document, + [ArgumentType::SearchScore] => ArgumentRustType::Document, + [ArgumentType::SearchPath] => ArgumentRustType::StringOrArray, + [ArgumentType::SearchOperator, ArgumentType::Array] => ArgumentRustType::Operator, + [ArgumentType::Int] => ArgumentRustType::I32, + _ => panic!("Unexpected argument types: {:?}", self.type_), } } } @@ -101,6 +106,15 @@ impl ArgumentRustType { Self::StringOrArray => parse_quote! { impl StringOrArray }, Self::TokenOrder => parse_quote! { TokenOrder }, Self::MatchCriteria => parse_quote! { MatchCriteria }, + Self::Operator => parse_quote! { impl IntoIterator> }, + Self::I32 => parse_quote! { i32 }, + } + } + + fn variables(&self) -> TokenStream { + match self { + Self::Operator => parse_quote! { T }, + _ => parse_quote! {}, } } @@ -109,7 +123,10 @@ impl ArgumentRustType { Self::String => parse_quote! { #ident.as_ref() }, Self::StringOrArray => parse_quote! { #ident.to_bson() }, Self::TokenOrder | Self::MatchCriteria => parse_quote! { #ident.name() }, - Self::Document => parse_quote! { #ident }, + Self::Document | Self::I32 => parse_quote! { #ident }, + Self::Operator => { + parse_quote! { #ident.into_iter().map(|s| s.into()).collect::>() } + } } } } @@ -148,9 +165,10 @@ fn gen_from_yaml(p: impl AsRef) -> TokenStream { let init_expr = rust_type.bson_expr(&ident); if arg.optional.unwrap_or(false) { + let tvars = rust_type.variables(); setters.push(parse_quote! { #[allow(missing_docs)] - pub fn #ident(mut self, #ident: #type_) -> Self { + pub fn #ident<#tvars>(mut self, #ident: #type_) -> Self { self.stage.insert(#arg_name, #init_expr); self } @@ -182,7 +200,11 @@ fn gen_from_yaml(p: impl AsRef) -> TokenStream { fn main() { let mut operators = TokenStream::new(); - for path in ["yaml/search/autocomplete.yaml", "yaml/search/text.yaml"] { + for path in [ + "yaml/search/autocomplete.yaml", + "yaml/search/text.yaml", + "yaml/search/compound.yaml", + ] { operators.push(gen_from_yaml(path)); } diff --git a/src/atlas_search.rs b/src/atlas_search.rs index f44467ec0..c45305b95 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -215,48 +215,53 @@ impl StringOrArray for Vec<&String> { } } -/* -#[test] -fn api_flow() { +#[tokio::test] +async fn api_flow() { + #[allow(unused_variables)] let coll: crate::Collection = todo!(); #[allow(unreachable_code)] { - let _ = coll.aggregate(vec![ - AtlasSearch::autocomplete("pre", "title") - .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) - .into(), - doc! { - "$limit": 10, - }, - doc! { - "$project": { - "_id": 0, - "title": 1, - } - }, - ]); - let _ = coll.aggregate(vec![ - AtlasSearch::text("baseball", "plot").into(), - doc! { "$limit": 3 }, - doc! { - "$project": { - "_id": 0, - "title": 1, - "plot": 1, - } - }, - ]); - let _ = coll.aggregate(vec![ - AtlasSearch::compound() - .must(AtlasSearch::text("varieties", "description")) - .should(AtlasSearch::text("Fuji", "description")) - .into(), - doc! { - "$project": { - "score": { "$meta": "searchScore" } - } - }, - ]); + let _ = coll + .aggregate(vec![ + AtlasSearch::autocomplete("pre", "title") + .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) + .into(), + doc! { + "$limit": 10, + }, + doc! { + "$project": { + "_id": 0, + "title": 1, + } + }, + ]) + .await; + let _ = coll + .aggregate(vec![ + AtlasSearch::text("baseball", "plot").into(), + doc! { "$limit": 3 }, + doc! { + "$project": { + "_id": 0, + "title": 1, + "plot": 1, + } + }, + ]) + .await; + let _ = coll + .aggregate(vec![ + AtlasSearch::compound() + .must(AtlasSearch::text("varieties", "description")) + .should(AtlasSearch::text("Fuji", "description")) + .into(), + doc! { + "$project": { + "score": { "$meta": "searchScore" } + } + }, + ]) + .await; } } -*/ diff --git a/src/atlas_search/gen.rs b/src/atlas_search/gen.rs index f1a73c77e..39cee31ba 100644 --- a/src/atlas_search/gen.rs +++ b/src/atlas_search/gen.rs @@ -69,3 +69,72 @@ impl AtlasSearch { self } } +#[allow(missing_docs)] +pub struct Compound; +impl AtlasSearch { + /**The compound operator combines two or more operators into a single query. + Each element of a compound query is called a clause, and each clause + consists of one or more sub-queries. + */ + pub fn compound() -> Self { + AtlasSearch { + name: "compound", + stage: doc! {}, + _t: PhantomData::default(), + } + } + #[allow(missing_docs)] + pub fn must(mut self, must: impl IntoIterator>) -> Self { + self.stage.insert( + "must", + must.into_iter() + .map(|s| s.into()) + .collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn must_not(mut self, must_not: impl IntoIterator>) -> Self { + self.stage.insert( + "mustNot", + must_not + .into_iter() + .map(|s| s.into()) + .collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn should(mut self, should: impl IntoIterator>) -> Self { + self.stage.insert( + "should", + should + .into_iter() + .map(|s| s.into()) + .collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn filter(mut self, filter: impl IntoIterator>) -> Self { + self.stage.insert( + "filter", + filter + .into_iter() + .map(|s| s.into()) + .collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn minimum_should_match(mut self, minimum_should_match: i32) -> Self { + self.stage + .insert("minimumShouldMatch", minimum_should_match); + self + } + #[allow(missing_docs)] + pub fn score(mut self, score: Document) -> Self { + self.stage.insert("score", score); + self + } +} From 2bdb6904bc6d06615bc8a00b2556b9c8e23fc3d6 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 12:33:56 +0100 Subject: [PATCH 09/15] fix test --- src/atlas_search.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/atlas_search.rs b/src/atlas_search.rs index c45305b95..7e16ae570 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -217,10 +217,13 @@ impl StringOrArray for Vec<&String> { #[tokio::test] async fn api_flow() { - #[allow(unused_variables)] - let coll: crate::Collection = todo!(); + // This is currently intended as a testbed for how the API works, not as an actual test. + return; + #[allow(unreachable_code)] { + #[allow(unused_variables)] + let coll: crate::Collection = todo!(); let _ = coll .aggregate(vec![ AtlasSearch::autocomplete("pre", "title") From 825c6fa380a7988661445667ce7cb1fb6a6ef959 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 15:17:57 +0100 Subject: [PATCH 10/15] fix doctest --- src/atlas_search.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/atlas_search.rs b/src/atlas_search.rs index 7e16ae570..dd393a28f 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -13,11 +13,11 @@ use crate::bson::{doc, Bson, Document}; /// [`into`](Into::into) or [`on_index`](AtlasSearch::on_index). /// /// ```no_run -/// # async fn wrapper() -> mongodb::error::Error { -/// # use mongodb::{Collection, bson::{Document, doc}}; -/// # let collection: Collection = todo!() -/// let cursor = coll.aggregate(vec![ -/// AtlasSearch::autocomplete("pre", "title") +/// # async fn wrapper() -> mongodb::error::Result<()> { +/// # use mongodb::{Collection, atlas_search::AtlasSearch, bson::{Document, doc}}; +/// # let collection: Collection = todo!(); +/// let cursor = collection.aggregate(vec![ +/// AtlasSearch::autocomplete("title", "pre") /// .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) /// .into(), /// doc! { @@ -226,7 +226,7 @@ async fn api_flow() { let coll: crate::Collection = todo!(); let _ = coll .aggregate(vec![ - AtlasSearch::autocomplete("pre", "title") + AtlasSearch::autocomplete("title", "pre") .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) .into(), doc! { @@ -242,7 +242,7 @@ async fn api_flow() { .await; let _ = coll .aggregate(vec![ - AtlasSearch::text("baseball", "plot").into(), + AtlasSearch::text("plot", "baseball").into(), doc! { "$limit": 3 }, doc! { "$project": { @@ -256,8 +256,8 @@ async fn api_flow() { let _ = coll .aggregate(vec![ AtlasSearch::compound() - .must(AtlasSearch::text("varieties", "description")) - .should(AtlasSearch::text("Fuji", "description")) + .must(AtlasSearch::text("description", "varieties")) + .should(AtlasSearch::text("description", "Fuji")) .into(), doc! { "$project": { From 97a6f52070ca8e3a884183284f9894c72f0e61db Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 15:23:12 +0100 Subject: [PATCH 11/15] fix clippy --- etc/gen_atlas_search/src/main.rs | 4 ++-- src/atlas_search.rs | 6 +++--- src/atlas_search/gen.rs | 25 +++++++------------------ 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index 38918aba4..e9b06dd3d 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -125,7 +125,7 @@ impl ArgumentRustType { Self::TokenOrder | Self::MatchCriteria => parse_quote! { #ident.name() }, Self::Document | Self::I32 => parse_quote! { #ident }, Self::Operator => { - parse_quote! { #ident.into_iter().map(|s| s.into()).collect::>() } + parse_quote! { #ident.into_iter().map(Document::from).collect::>() } } } } @@ -190,7 +190,7 @@ fn gen_from_yaml(p: impl AsRef) -> TokenStream { AtlasSearch { name: #name_text, stage: doc! { #init_doc }, - _t: PhantomData::default(), + _t: PhantomData, } } #setters diff --git a/src/atlas_search.rs b/src/atlas_search.rs index dd393a28f..253550bb9 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -38,11 +38,11 @@ pub struct AtlasSearch { _t: PhantomData, } -impl Into for AtlasSearch { - fn into(self) -> Document { +impl From> for Document { + fn from(value: AtlasSearch) -> Self { doc! { "$search": { - self.name: self.stage + value.name: value.stage } } } diff --git a/src/atlas_search/gen.rs b/src/atlas_search/gen.rs index 39cee31ba..f1eff3490 100644 --- a/src/atlas_search/gen.rs +++ b/src/atlas_search/gen.rs @@ -14,7 +14,7 @@ impl AtlasSearch { stage: doc! { "path" : path.to_bson(), "query" : query.to_bson(), }, - _t: PhantomData::default(), + _t: PhantomData, } } #[allow(missing_docs)] @@ -45,7 +45,7 @@ impl AtlasSearch { stage: doc! { "path" : path.to_bson(), "query" : query.to_bson(), }, - _t: PhantomData::default(), + _t: PhantomData, } } #[allow(missing_docs)] @@ -80,16 +80,14 @@ impl AtlasSearch { AtlasSearch { name: "compound", stage: doc! {}, - _t: PhantomData::default(), + _t: PhantomData, } } #[allow(missing_docs)] pub fn must(mut self, must: impl IntoIterator>) -> Self { self.stage.insert( "must", - must.into_iter() - .map(|s| s.into()) - .collect::>(), + must.into_iter().map(Document::from).collect::>(), ); self } @@ -97,10 +95,7 @@ impl AtlasSearch { pub fn must_not(mut self, must_not: impl IntoIterator>) -> Self { self.stage.insert( "mustNot", - must_not - .into_iter() - .map(|s| s.into()) - .collect::>(), + must_not.into_iter().map(Document::from).collect::>(), ); self } @@ -108,10 +103,7 @@ impl AtlasSearch { pub fn should(mut self, should: impl IntoIterator>) -> Self { self.stage.insert( "should", - should - .into_iter() - .map(|s| s.into()) - .collect::>(), + should.into_iter().map(Document::from).collect::>(), ); self } @@ -119,10 +111,7 @@ impl AtlasSearch { pub fn filter(mut self, filter: impl IntoIterator>) -> Self { self.stage.insert( "filter", - filter - .into_iter() - .map(|s| s.into()) - .collect::>(), + filter.into_iter().map(Document::from).collect::>(), ); self } From 6f392fcbc746d715c5ffa00d65a0efae852c207b Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 15:27:45 +0100 Subject: [PATCH 12/15] rename build to unit --- src/atlas_search.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/atlas_search.rs b/src/atlas_search.rs index 253550bb9..8e0be7aa7 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -48,17 +48,14 @@ impl From> for Document { } } -#[allow(missing_docs)] -pub struct Built; - impl AtlasSearch { - /// Finalize this builder. Not typically needed, but can be useful to include builders of + /// Erase the type this builder. Not typically needed, but can be useful to include builders of /// different types in a single `Vec`. - pub fn build(self) -> AtlasSearch { + pub fn unit(self) -> AtlasSearch<()> { AtlasSearch { name: self.name, stage: self.stage, - _t: PhantomData::default(), + _t: PhantomData, } } From c7e1cc7e5442d3761b9a070e7c6fa61bb308ab84 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 15 Aug 2025 15:28:02 +0100 Subject: [PATCH 13/15] typo --- src/atlas_search.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atlas_search.rs b/src/atlas_search.rs index 8e0be7aa7..c910da4a8 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -49,8 +49,8 @@ impl From> for Document { } impl AtlasSearch { - /// Erase the type this builder. Not typically needed, but can be useful to include builders of - /// different types in a single `Vec`. + /// Erase the type of this builder. Not typically needed, but can be useful to include builders + /// of different types in a single `Vec`. pub fn unit(self) -> AtlasSearch<()> { AtlasSearch { name: self.name, From 7c54124dcf56787d01f82ccad032abef2b10b6ba Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Mon, 18 Aug 2025 12:14:04 +0100 Subject: [PATCH 14/15] readability --- etc/gen_atlas_search/src/main.rs | 145 +++++++++++++++---------------- src/atlas_search.rs | 2 + 2 files changed, 74 insertions(+), 73 deletions(-) diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index e9b06dd3d..df75950a0 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -20,13 +20,6 @@ struct Operator { tests: Vec, } -impl Operator { - fn clear_tests(mut self) -> Self { - self.tests.clear(); - self - } -} - #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] enum OperatorType { @@ -39,6 +32,63 @@ enum EncodeType { Object, } +impl Operator { + fn clear_tests(mut self) -> Self { + self.tests.clear(); + self + } + + fn gen_helper(&self) -> TokenStream { + let name_text = &self.name; + let name_ident = format_ident!("{}", name_text.to_case(Case::Pascal)); + let constr_ident = format_ident!("{}", name_text.to_case(Case::Snake)); + + let mut required_args = TokenStream::new(); + let mut init_doc = TokenStream::new(); + let mut setters = TokenStream::new(); + + for arg in &self.arguments { + let ident = format_ident!("{}", arg.name.to_case(Case::Snake)); + let rust_type = arg.rust_type(); + let type_ = rust_type.tokens(); + let arg_name = &arg.name; + let init_expr = rust_type.bson_expr(&ident); + + if arg.optional.unwrap_or(false) { + let tvars = rust_type.variables(); + setters.push(parse_quote! { + #[allow(missing_docs)] + pub fn #ident<#tvars>(mut self, #ident: #type_) -> Self { + self.stage.insert(#arg_name, #init_expr); + self + } + }); + } else { + required_args.push(parse_quote! { #ident : #type_, }); + init_doc.push(parse_quote! { #arg_name : #init_expr, }); + } + } + + let desc = &self.description; + parse_quote! { + #[allow(missing_docs)] + pub struct #name_ident; + + impl AtlasSearch<#name_ident> { + #[doc = #desc] + pub fn #constr_ident(#required_args) -> Self { + AtlasSearch { + name: #name_text, + stage: doc! { #init_doc }, + _t: PhantomData, + } + } + #setters + } + } + } +} + #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] struct Argument { @@ -61,16 +111,6 @@ enum ArgumentType { Int, } -enum ArgumentRustType { - String, - Document, - StringOrArray, - TokenOrder, - MatchCriteria, - Operator, - I32, -} - static QUERY: &str = "query"; static TOKEN_ORDER: &str = "tokenOrder"; static MATCH_CRITERIA: &str = "matchCriteria"; @@ -98,6 +138,16 @@ impl Argument { } } +enum ArgumentRustType { + String, + Document, + StringOrArray, + TokenOrder, + MatchCriteria, + Operator, + I32, +} + impl ArgumentRustType { fn tokens(&self) -> syn::Type { match self { @@ -143,61 +193,6 @@ impl TokenStreamExt for TokenStream { } } -fn gen_from_yaml(p: impl AsRef) -> TokenStream { - let contents = std::fs::read_to_string(p).unwrap(); - let parsed = serde_yaml::from_str::(&contents) - .unwrap() - .clear_tests(); - - let name_text = parsed.name; - let name_ident = format_ident!("{}", name_text.to_case(Case::Pascal)); - let constr_ident = format_ident!("{}", name_text.to_case(Case::Snake)); - - let mut required_args = TokenStream::new(); - let mut init_doc = TokenStream::new(); - let mut setters = TokenStream::new(); - - for arg in parsed.arguments { - let ident = format_ident!("{}", arg.name.to_case(Case::Snake)); - let rust_type = arg.rust_type(); - let type_ = rust_type.tokens(); - let arg_name = &arg.name; - let init_expr = rust_type.bson_expr(&ident); - - if arg.optional.unwrap_or(false) { - let tvars = rust_type.variables(); - setters.push(parse_quote! { - #[allow(missing_docs)] - pub fn #ident<#tvars>(mut self, #ident: #type_) -> Self { - self.stage.insert(#arg_name, #init_expr); - self - } - }); - } else { - required_args.push(parse_quote! { #ident : #type_, }); - init_doc.push(parse_quote! { #arg_name : #init_expr, }); - } - } - - let desc = parsed.description; - parse_quote! { - #[allow(missing_docs)] - pub struct #name_ident; - - impl AtlasSearch<#name_ident> { - #[doc = #desc] - pub fn #constr_ident(#required_args) -> Self { - AtlasSearch { - name: #name_text, - stage: doc! { #init_doc }, - _t: PhantomData, - } - } - #setters - } - } -} - fn main() { let mut operators = TokenStream::new(); for path in [ @@ -205,7 +200,11 @@ fn main() { "yaml/search/text.yaml", "yaml/search/compound.yaml", ] { - operators.push(gen_from_yaml(path)); + let contents = std::fs::read_to_string(path).unwrap(); + let parsed = serde_yaml::from_str::(&contents) + .unwrap() + .clear_tests(); + operators.push(parsed.gen_helper()); } let file = parse_quote! { diff --git a/src/atlas_search.rs b/src/atlas_search.rs index c910da4a8..7a3ac865a 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -83,6 +83,7 @@ impl IntoIterator for AtlasSearch { /// Order in which to search for tokens. #[derive(Debug, Clone, PartialEq)] +#[non_exhaustive] pub enum TokenOrder { /// Indicates tokens in the query can appear in any order in the documents. Any, @@ -105,6 +106,7 @@ impl TokenOrder { /// Criteria to use to match the terms in the query. #[derive(Debug, Clone, PartialEq)] +#[non_exhaustive] pub enum MatchCriteria { /// Return documents that contain any of the terms from the query field. Any, From ad7c09ebd2af12f13b23c15ac25f670397399690 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 22 Aug 2025 10:09:18 +0100 Subject: [PATCH 15/15] doc improvements --- etc/gen_atlas_search/src/main.rs | 7 ++++++- src/atlas_search.rs | 18 +++++++++++++++++- src/atlas_search/gen.rs | 6 ++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs index df75950a0..4a1346918 100644 --- a/etc/gen_atlas_search/src/main.rs +++ b/etc/gen_atlas_search/src/main.rs @@ -8,7 +8,6 @@ use syn::parse_quote; #[serde(rename_all = "camelCase", deny_unknown_fields)] struct Operator { name: String, - #[expect(dead_code)] link: String, #[serde(rename = "type")] #[expect(dead_code)] @@ -70,12 +69,18 @@ impl Operator { } let desc = &self.description; + let link = format!( + "For more details, see the [{name_text} operator reference]({}).", + self.link + ); parse_quote! { #[allow(missing_docs)] pub struct #name_ident; impl AtlasSearch<#name_ident> { #[doc = #desc] + #[doc = ""] + #[doc = #link] pub fn #constr_ident(#required_args) -> Self { AtlasSearch { name: #name_text, diff --git a/src/atlas_search.rs b/src/atlas_search.rs index 7a3ac865a..c82645190 100644 --- a/src/atlas_search.rs +++ b/src/atlas_search.rs @@ -50,7 +50,23 @@ impl From> for Document { impl AtlasSearch { /// Erase the type of this builder. Not typically needed, but can be useful to include builders - /// of different types in a single `Vec`. + /// of different types in a single `Vec`: + /// ```no_run + /// # async fn wrapper() -> mongodb::error::Result<()> { + /// # use mongodb::{Collection, atlas_search::AtlasSearch, bson::{Document, doc}}; + /// # let collection: Collection = todo!(); + /// let cursor = collection.aggregate(vec![ + /// AtlasSearch::compound() + /// .must(vec![ + /// AtlasSearch::text("description", "varieties").unit(), + /// AtlasSearch::compound() + /// .should(AtlasSearch::text("description", "Fuji")) + /// .unit(), + /// ]) + /// .into(), + /// ]).await?; + /// # } + /// ``` pub fn unit(self) -> AtlasSearch<()> { AtlasSearch { name: self.name, diff --git a/src/atlas_search/gen.rs b/src/atlas_search/gen.rs index f1eff3490..31111c43b 100644 --- a/src/atlas_search/gen.rs +++ b/src/atlas_search/gen.rs @@ -8,6 +8,8 @@ impl AtlasSearch { fields that you intend to query with the autocomplete operator must be indexed with the autocomplete data type in the collection's index definition. */ + /// + ///For more details, see the [autocomplete operator reference](https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/). pub fn autocomplete(path: impl StringOrArray, query: impl StringOrArray) -> Self { AtlasSearch { name: "autocomplete", @@ -39,6 +41,8 @@ impl AtlasSearch { /**The text operator performs a full-text search using the analyzer that you specify in the index configuration. If you omit an analyzer, the text operator uses the default standard analyzer. */ + /// + ///For more details, see the [text operator reference](https://www.mongodb.com/docs/atlas/atlas-search/text/). pub fn text(path: impl StringOrArray, query: impl StringOrArray) -> Self { AtlasSearch { name: "text", @@ -76,6 +80,8 @@ impl AtlasSearch { Each element of a compound query is called a clause, and each clause consists of one or more sub-queries. */ + /// + ///For more details, see the [compound operator reference](https://www.mongodb.com/docs/atlas/atlas-search/compound/). pub fn compound() -> Self { AtlasSearch { name: "compound",