From e7c56a556cb508022423753ca4ada0a46de93c75 Mon Sep 17 00:00:00 2001 From: Andrew Baxter <> Date: Sun, 20 Oct 2024 16:28:14 +0900 Subject: [PATCH 1/2] Expose AST --- Cargo.lock | 7 + Cargo.toml | 3 +- cozo-core-examples/Cargo.toml | 9 ++ cozo-core-examples/src/bin/run.rs | 10 ++ cozo-core-examples/src/bin/run_ast.rs | 58 +++++++ cozo-core-examples/src/bin/run_parse_ast.rs | 14 ++ cozo-core/src/data/aggr.rs | 14 +- cozo-core/src/data/functions.rs | 2 +- cozo-core/src/data/mod.rs | 6 +- cozo-core/src/data/program.rs | 169 ++++++++++---------- cozo-core/src/data/symb.rs | 4 +- cozo-core/src/fts/mod.rs | 7 +- cozo-core/src/lib.rs | 73 ++++++--- cozo-core/src/parse/mod.rs | 115 ++++++++----- cozo-core/src/parse/sys.rs | 65 ++++---- cozo-core/src/runtime/db.rs | 57 +++---- cozo-core/src/runtime/relation.rs | 2 +- 17 files changed, 395 insertions(+), 220 deletions(-) create mode 100644 cozo-core-examples/Cargo.toml create mode 100644 cozo-core-examples/src/bin/run.rs create mode 100644 cozo-core-examples/src/bin/run_ast.rs create mode 100644 cozo-core-examples/src/bin/run_parse_ast.rs diff --git a/Cargo.lock b/Cargo.lock index 3cd142b2..d51b219d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -794,6 +794,13 @@ dependencies = [ "tower-http", ] +[[package]] +name = "cozo-core-examples" +version = "0.1.0" +dependencies = [ + "cozo", +] + [[package]] name = "cozo-lib-wasm" version = "0.7.6" diff --git a/Cargo.toml b/Cargo.toml index 35282861..0e38454d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,8 @@ members = [ "cozo-lib-wasm", "cozo-lib-swift", "cozo-lib-python", - "cozo-lib-nodejs" + "cozo-lib-nodejs", + "cozo-core-examples", ] [profile.bench] diff --git a/cozo-core-examples/Cargo.toml b/cozo-core-examples/Cargo.toml new file mode 100644 index 00000000..70f0e0f6 --- /dev/null +++ b/cozo-core-examples/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "cozo-core-examples" +version = "0.1.0" +edition = "2021" + +[dependencies] +cozo = { version = "0.7.6", path = "../cozo-core", default-features = false, features = [ + "rayon", +] } diff --git a/cozo-core-examples/src/bin/run.rs b/cozo-core-examples/src/bin/run.rs new file mode 100644 index 00000000..c19ab3c4 --- /dev/null +++ b/cozo-core-examples/src/bin/run.rs @@ -0,0 +1,10 @@ +use cozo::{DbInstance, ScriptMutability}; + +fn main() { + let db = DbInstance::new("mem", "", Default::default()).unwrap(); + let script = "?[a] := a in [1, 2, 3]"; + let result = db + .run_script(script, Default::default(), ScriptMutability::Immutable) + .unwrap(); + println!("{:?}", result); +} diff --git a/cozo-core-examples/src/bin/run_ast.rs b/cozo-core-examples/src/bin/run_ast.rs new file mode 100644 index 00000000..85706c39 --- /dev/null +++ b/cozo-core-examples/src/bin/run_ast.rs @@ -0,0 +1,58 @@ +use std::collections::BTreeMap; + +use cozo::{ + data::{ + functions::current_validity, + program::{InputAtom, InputInlineRule, InputInlineRulesOrFixed, InputProgram, Unification}, + symb::PROG_ENTRY, + }, + parse::{CozoScript, ImperativeStmt, ImperativeStmtClause}, + DataValue, DbInstance, Num, ScriptMutability, Symbol, +}; + +fn main() { + let db = DbInstance::new("mem", "", Default::default()).unwrap(); + let sym_a = Symbol::new("a", Default::default()); + let script = CozoScript::Imperative(vec![ImperativeStmt::Program { + prog: ImperativeStmtClause { + prog: InputProgram { + prog: { + let mut p = BTreeMap::new(); + p.insert( + Symbol::new(PROG_ENTRY, Default::default()), + InputInlineRulesOrFixed::Rules { + rules: vec![InputInlineRule { + head: vec![sym_a.clone()], + aggr: vec![None], + body: vec![InputAtom::Unification { + inner: Unification { + binding: sym_a, + expr: cozo::Expr::Const { + val: DataValue::List(vec![ + DataValue::Num(Num::Int(1)), + DataValue::Num(Num::Int(2)), + DataValue::Num(Num::Int(3)), + ]), + span: Default::default(), + }, + one_many_unif: true, + span: Default::default(), + }, + }], + span: Default::default(), + }], + }, + ); + p + }, + out_opts: Default::default(), + disable_magic_rewrite: false, + }, + store_as: None, + }, + }]); + let result = db + .run_script_ast(script, current_validity(), ScriptMutability::Immutable) + .unwrap(); + println!("{:?}", result); +} diff --git a/cozo-core-examples/src/bin/run_parse_ast.rs b/cozo-core-examples/src/bin/run_parse_ast.rs new file mode 100644 index 00000000..2714f5d9 --- /dev/null +++ b/cozo-core-examples/src/bin/run_parse_ast.rs @@ -0,0 +1,14 @@ +use cozo::{data::functions::current_validity, parse::parse_script, DbInstance, ScriptMutability}; + +fn main() { + let db = DbInstance::new("mem", "", Default::default()).unwrap(); + let script = "?[a] := a in [1, 2, 3]"; + let cur_vld = current_validity(); + let script_ast = + parse_script(script, &Default::default(), &db.get_fixed_rules(), cur_vld).unwrap(); + println!("AST: {:?}", script_ast); + let result = db + .run_script_ast(script_ast, cur_vld, ScriptMutability::Immutable) + .unwrap(); + println!("Result: {:?}", result); +} diff --git a/cozo-core/src/data/aggr.rs b/cozo-core/src/data/aggr.rs index 86652e0c..a555dd19 100644 --- a/cozo-core/src/data/aggr.rs +++ b/cozo-core/src/data/aggr.rs @@ -14,11 +14,11 @@ use rand::prelude::*; use crate::data::value::DataValue; -pub(crate) struct Aggregation { - pub(crate) name: &'static str, - pub(crate) is_meet: bool, - pub(crate) meet_op: Option>, - pub(crate) normal_op: Option>, +pub struct Aggregation { + pub name: &'static str, + pub is_meet: bool, + pub meet_op: Option>, + pub normal_op: Option>, } impl Clone for Aggregation { @@ -32,12 +32,12 @@ impl Clone for Aggregation { } } -pub(crate) trait NormalAggrObj: Send + Sync { +pub trait NormalAggrObj: Send + Sync { fn set(&mut self, value: &DataValue) -> Result<()>; fn get(&self) -> Result; } -pub(crate) trait MeetAggrObj: Send + Sync { +pub trait MeetAggrObj: Send + Sync { fn init_val(&self) -> DataValue; fn update(&self, left: &mut DataValue, right: &DataValue) -> Result; } diff --git a/cozo-core/src/data/functions.rs b/cozo-core/src/data/functions.rs index 1e804f00..d3d40673 100644 --- a/cozo-core/src/data/functions.rs +++ b/cozo-core/src/data/functions.rs @@ -2453,7 +2453,7 @@ pub(crate) fn op_now(_args: &[DataValue]) -> Result { )) } -pub(crate) fn current_validity() -> ValidityTs { +pub fn current_validity() -> ValidityTs { #[cfg(not(target_arch = "wasm32"))] let ts_micros = { let now = SystemTime::now(); diff --git a/cozo-core/src/data/mod.rs b/cozo-core/src/data/mod.rs index 959a68c6..dea0cccd 100644 --- a/cozo-core/src/data/mod.rs +++ b/cozo-core/src/data/mod.rs @@ -8,12 +8,12 @@ pub(crate) mod aggr; pub(crate) mod expr; -pub(crate) mod functions; +pub mod functions; pub(crate) mod json; pub(crate) mod memcmp; -pub(crate) mod program; +pub mod program; pub(crate) mod relation; -pub(crate) mod symb; +pub mod symb; pub(crate) mod tuple; pub(crate) mod value; diff --git a/cozo-core/src/data/program.rs b/cozo-core/src/data/program.rs index d3c6b0af..cf7e8ee2 100644 --- a/cozo-core/src/data/program.rs +++ b/cozo-core/src/data/program.rs @@ -47,14 +47,16 @@ pub(crate) enum ReturnMutation { } #[derive(Clone, PartialEq, Default)] -pub(crate) struct QueryOutOptions { - pub(crate) limit: Option, - pub(crate) offset: Option, - pub(crate) timeout: Option, - pub(crate) sleep: Option, - pub(crate) sorters: Vec<(Symbol, SortDir)>, - pub(crate) store_relation: Option<(InputRelationHandle, RelationOp, ReturnMutation)>, - pub(crate) assertion: Option, +pub struct QueryOutOptions { + pub limit: Option, + pub offset: Option, + /// Terminate query with an error if it exceeds this many seconds. + pub timeout: Option, + /// Sleep after performing the query for this number of seconds. Ignored in WASM. + pub sleep: Option, + pub sorters: Vec<(Symbol, SortDir)>, + pub store_relation: Option<(InputRelationHandle, RelationOp, ReturnMutation)>, + pub assertion: Option, } impl Debug for QueryOutOptions { @@ -82,16 +84,16 @@ impl Display for QueryOutOptions { writeln!(f, "{symb};")?; } if let Some(( - InputRelationHandle { - name, - metadata: StoredRelationMetadata { keys, non_keys }, - key_bindings, - dep_bindings, - .. - }, - op, - return_mutation, - )) = &self.store_relation + InputRelationHandle { + name, + metadata: StoredRelationMetadata { keys, non_keys }, + key_bindings, + dep_bindings, + .. + }, + op, + return_mutation, + )) = &self.store_relation { if *return_mutation == ReturnMutation::Returning { writeln!(f, ":returning")?; @@ -184,13 +186,13 @@ impl QueryOutOptions { } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub(crate) enum SortDir { +pub enum SortDir { Asc, Dsc, } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub(crate) enum RelationOp { +pub enum RelationOp { Create, Replace, Put, @@ -219,7 +221,7 @@ impl TempSymbGen { } #[derive(Debug, Clone)] -pub(crate) enum InputInlineRulesOrFixed { +pub enum InputInlineRulesOrFixed { Rules { rules: Vec }, Fixed { fixed: FixedRuleApply }, } @@ -249,14 +251,14 @@ impl InputInlineRulesOrFixed { } #[derive(Clone)] -pub(crate) struct FixedRuleApply { - pub(crate) fixed_handle: FixedRuleHandle, - pub(crate) rule_args: Vec, - pub(crate) options: Arc, Expr>>, - pub(crate) head: Vec, - pub(crate) arity: usize, - pub(crate) span: SourceSpan, - pub(crate) fixed_impl: Arc>, +pub struct FixedRuleApply { + pub fixed_handle: FixedRuleHandle, + pub rule_args: Vec, + pub options: Arc, Expr>>, + pub head: Vec, + pub arity: usize, + pub span: SourceSpan, + pub fixed_impl: Arc>, } impl FixedRuleApply { @@ -367,7 +369,7 @@ impl Debug for MagicFixedRuleApply { } #[derive(Clone)] -pub(crate) enum FixedRuleArg { +pub enum FixedRuleArg { InMem { name: Symbol, bindings: Vec, @@ -460,11 +462,15 @@ impl MagicFixedRuleRuleArg { } } +/// This is a single query, as you'd find between `{}` in a chained query script or with no `{}` in a single query script. #[derive(Debug, Clone)] -pub(crate) struct InputProgram { - pub(crate) prog: BTreeMap, - pub(crate) out_opts: QueryOutOptions, - pub(crate) disable_magic_rewrite: bool, +pub struct InputProgram { + /// A mapping of names to rules. The entry rule must be named `?`. + /// + /// Ex: `?` in `?[a, b] := ...` + pub prog: BTreeMap, + pub out_opts: QueryOutOptions, + pub disable_magic_rewrite: bool, } impl Display for InputProgram { @@ -504,13 +510,13 @@ impl Display for InputProgram { } InputInlineRulesOrFixed::Fixed { fixed: - FixedRuleApply { - fixed_handle: handle, - rule_args, - options, - head, - .. - }, + FixedRuleApply { + fixed_handle: handle, + rule_args, + options, + head, + .. + }, } => { write!(f, "{name}")?; f.debug_list().entries(head).finish()?; @@ -645,7 +651,7 @@ impl InputProgram { inner: rule.body, span: rule.span, } - .disjunctive_normal_form(tx)?; + .disjunctive_normal_form(tx)?; let mut new_head = Vec::with_capacity(rule.head.len()); let mut seen: BTreeMap<&Symbol, Vec> = BTreeMap::default(); for symb in rule.head.iter() { @@ -879,11 +885,11 @@ impl MagicSymbol { } #[derive(Debug, Clone)] -pub(crate) struct InputInlineRule { - pub(crate) head: Vec, - pub(crate) aggr: Vec)>>, - pub(crate) body: Vec, - pub(crate) span: SourceSpan, +pub struct InputInlineRule { + pub head: Vec, + pub aggr: Vec)>>, + pub body: Vec, + pub span: SourceSpan, } #[derive(Debug)] @@ -923,7 +929,7 @@ impl MagicInlineRule { } #[derive(Clone)] -pub(crate) enum InputAtom { +pub enum InputAtom { Rule { inner: InputRuleApplyAtom, }, @@ -948,6 +954,7 @@ pub(crate) enum InputAtom { inner: Vec, span: SourceSpan, }, + /// `x = y` or `x in y` Unification { inner: Unification, }, @@ -957,12 +964,12 @@ pub(crate) enum InputAtom { } #[derive(Clone)] -pub(crate) struct SearchInput { - pub(crate) relation: Symbol, - pub(crate) index: Symbol, - pub(crate) bindings: BTreeMap, Expr>, - pub(crate) parameters: BTreeMap, Expr>, - pub(crate) span: SourceSpan, +pub struct SearchInput { + pub relation: Symbol, + pub index: Symbol, + pub bindings: BTreeMap, Expr>, + pub parameters: BTreeMap, Expr>, + pub span: SourceSpan, } #[derive(Clone, Debug)] @@ -1007,7 +1014,7 @@ pub(crate) struct FtsSearch { } impl HnswSearch { - pub(crate) fn all_bindings(&self) -> impl Iterator { + pub(crate) fn all_bindings(&self) -> impl Iterator { self.bindings .iter() .chain(self.bind_field.iter()) @@ -1018,7 +1025,7 @@ impl HnswSearch { } impl FtsSearch { - pub(crate) fn all_bindings(&self) -> impl Iterator { + pub(crate) fn all_bindings(&self) -> impl Iterator { self.bindings.iter().chain(self.bind_score.iter()) } } @@ -1670,12 +1677,12 @@ impl Display for InputAtom { } InputAtom::Unification { inner: - Unification { - binding, - expr, - one_many_unif, - .. - }, + Unification { + binding, + expr, + one_many_unif, + .. + }, } => { write!(f, "{binding}")?; if *one_many_unif { @@ -1743,26 +1750,26 @@ pub(crate) enum MagicAtom { } #[derive(Clone, Debug)] -pub(crate) struct InputRuleApplyAtom { - pub(crate) name: Symbol, - pub(crate) args: Vec, - pub(crate) span: SourceSpan, +pub struct InputRuleApplyAtom { + pub name: Symbol, + pub args: Vec, + pub span: SourceSpan, } #[derive(Clone, Debug)] -pub(crate) struct InputNamedFieldRelationApplyAtom { - pub(crate) name: Symbol, - pub(crate) args: BTreeMap, Expr>, - pub(crate) valid_at: Option, - pub(crate) span: SourceSpan, +pub struct InputNamedFieldRelationApplyAtom { + pub name: Symbol, + pub args: BTreeMap, Expr>, + pub valid_at: Option, + pub span: SourceSpan, } #[derive(Clone, Debug)] -pub(crate) struct InputRelationApplyAtom { - pub(crate) name: Symbol, - pub(crate) args: Vec, - pub(crate) valid_at: Option, - pub(crate) span: SourceSpan, +pub struct InputRelationApplyAtom { + pub name: Symbol, + pub args: Vec, + pub valid_at: Option, + pub span: SourceSpan, } #[derive(Clone, Debug)] @@ -1796,11 +1803,11 @@ pub(crate) struct MagicRelationApplyAtom { } #[derive(Clone, Debug)] -pub(crate) struct Unification { - pub(crate) binding: Symbol, - pub(crate) expr: Expr, - pub(crate) one_many_unif: bool, - pub(crate) span: SourceSpan, +pub struct Unification { + pub binding: Symbol, + pub expr: Expr, + pub one_many_unif: bool, + pub span: SourceSpan, } impl Unification { diff --git a/cozo-core/src/data/symb.rs b/cozo-core/src/data/symb.rs index 531d9619..34775928 100644 --- a/cozo-core/src/data/symb.rs +++ b/cozo-core/src/data/symb.rs @@ -73,7 +73,7 @@ impl Debug for Symbol { } impl Symbol { - pub(crate) fn new(name: impl Into>, span: SourceSpan) -> Self { + pub fn new(name: impl Into>, span: SourceSpan) -> Self { Self { name: name.into(), span, @@ -104,4 +104,4 @@ impl Symbol { } } -pub(crate) const PROG_ENTRY: &str = "?"; +pub const PROG_ENTRY: &str = "?"; diff --git a/cozo-core/src/fts/mod.rs b/cozo-core/src/fts/mod.rs index 046bbadb..74a4a96f 100644 --- a/cozo-core/src/fts/mod.rs +++ b/cozo-core/src/fts/mod.rs @@ -36,10 +36,11 @@ pub(crate) struct FtsIndexManifest { pub(crate) filters: Vec, } +#[allow(missing_docs)] #[derive(Debug, Clone, PartialEq, Eq, Hash, serde_derive::Serialize, serde_derive::Deserialize)] -pub(crate) struct TokenizerConfig { - pub(crate) name: SmartString, - pub(crate) args: Vec, +pub struct TokenizerConfig { + pub name: SmartString, + pub args: Vec, } impl TokenizerConfig { diff --git a/cozo-core/src/lib.rs b/cozo-core/src/lib.rs index 84a6297c..a3a5525a 100644 --- a/cozo-core/src/lib.rs +++ b/cozo-core/src/lib.rs @@ -33,10 +33,12 @@ use std::collections::BTreeMap; use std::path::Path; +use std::sync::Arc; #[allow(unused_imports)] use std::time::Instant; use crossbeam::channel::{bounded, Receiver, Sender}; +use data::functions::current_validity; use lazy_static::lazy_static; pub use miette::Error; use miette::Report; @@ -45,6 +47,8 @@ use miette::{ bail, miette, GraphicalReportHandler, GraphicalTheme, IntoDiagnostic, JSONReportHandler, Result, ThemeCharacters, ThemeStyles, }; +use parse::parse_script; +use parse::CozoScript; use serde_json::json; pub use data::value::{DataValue, Num, RegexWrapper, UuidWrapper, Validity, ValidityTs}; @@ -73,15 +77,15 @@ pub use crate::parse::SourceSpan; pub use crate::runtime::callback::CallbackOp; pub use crate::runtime::db::evaluate_expressions; pub use crate::runtime::db::get_variables; +pub use crate::runtime::db::Payload; pub use crate::runtime::db::Poison; pub use crate::runtime::db::ScriptMutability; -pub use crate::runtime::db::Payload; pub use crate::runtime::db::TransactionPayload; -pub(crate) mod data; +pub mod data; pub(crate) mod fixed_rule; pub(crate) mod fts; -pub(crate) mod parse; +pub mod parse; pub(crate) mod query; pub(crate) mod runtime; pub(crate) mod storage; @@ -170,29 +174,58 @@ impl DbInstance { ) -> std::result::Result { Self::new(engine, path, options).map_err(|err| err.to_string()) } + + /// Dispatcher method. See [crate::Db::get_fixed_rules]. + pub fn get_fixed_rules(&self) -> BTreeMap>> { + match self { + DbInstance::Mem(db) => db.get_fixed_rules(), + #[cfg(feature = "storage-sqlite")] + DbInstance::Sqlite(db) => db.get_fixed_rules(), + #[cfg(feature = "storage-rocksdb")] + DbInstance::RocksDb(db) => db.get_fixed_rules(), + #[cfg(feature = "storage-sled")] + DbInstance::Sled(db) => db.get_fixed_rules(), + #[cfg(feature = "storage-tikv")] + DbInstance::TiKv(db) => db.get_fixed_rules(), + } + } /// Dispatcher method. See [crate::Db::run_script]. pub fn run_script( &self, payload: &str, params: BTreeMap, mutability: ScriptMutability, + ) -> Result { + let cur_vld = current_validity(); + self.run_script_ast( + parse_script(payload, ¶ms, &self.get_fixed_rules(), cur_vld)?, + cur_vld, + mutability, + ) + } + /// `run_script` with mutable script and no parameters + pub fn run_default(&self, payload: &str) -> Result { + self.run_script(payload, BTreeMap::new(), ScriptMutability::Mutable) + } + /// Run a parsed (AST) program. If you have a string script, use `run_script` or `run_default`. + pub fn run_script_ast( + &self, + payload: CozoScript, + cur_vld: ValidityTs, + mutability: ScriptMutability, ) -> Result { match self { - DbInstance::Mem(db) => db.run_script(payload, params, mutability), + DbInstance::Mem(db) => db.run_script_ast(payload, cur_vld, mutability), #[cfg(feature = "storage-sqlite")] - DbInstance::Sqlite(db) => db.run_script(payload, params, mutability), + DbInstance::Sqlite(db) => db.run_script_ast(payload, cur_vld, mutability), #[cfg(feature = "storage-rocksdb")] - DbInstance::RocksDb(db) => db.run_script(payload, params, mutability), + DbInstance::RocksDb(db) => db.run_script_ast(payload, cur_vld, mutability), #[cfg(feature = "storage-sled")] - DbInstance::Sled(db) => db.run_script(payload, params, mutability), + DbInstance::Sled(db) => db.run_script_ast(payload, cur_vld, mutability), #[cfg(feature = "storage-tikv")] - DbInstance::TiKv(db) => db.run_script(payload, params, mutability), + DbInstance::TiKv(db) => db.run_script_ast(payload, cur_vld, mutability), } } - /// `run_script` with mutable script and no parameters - pub fn run_default(&self, payload: &str) -> Result { - self.run_script(payload, BTreeMap::new(), ScriptMutability::Mutable) - } /// Run the CozoScript passed in. The `params` argument is a map of parameters. /// Fold any error into the return JSON itself. /// See [crate::Db::run_script]. @@ -203,13 +236,13 @@ impl DbInstance { mutability: ScriptMutability, ) -> JsonValue { #[cfg(not(target_arch = "wasm32"))] - let start = Instant::now(); + let start = Instant::now(); match self.run_script(payload, params, mutability) { Ok(named_rows) => { let mut j_val = named_rows.into_json(); #[cfg(not(target_arch = "wasm32"))] - let took = start.elapsed().as_secs_f64(); + let took = start.elapsed().as_secs_f64(); let map = j_val.as_object_mut().unwrap(); map.insert("ok".to_string(), json!(true)); #[cfg(not(target_arch = "wasm32"))] @@ -246,13 +279,13 @@ impl DbInstance { ScriptMutability::Mutable }, ) - .to_string() + .to_string() } /// Dispatcher method. See [crate::Db::export_relations]. pub fn export_relations(&self, relations: I) -> Result> - where - T: AsRef, - I: Iterator, + where + T: AsRef, + I: Iterator, { match self { DbInstance::Mem(db) => db.export_relations(relations), @@ -451,8 +484,8 @@ impl DbInstance { } /// Dispatcher method. See [crate::Db::register_fixed_rule]. pub fn register_fixed_rule(&self, name: String, rule_impl: R) -> Result<()> - where - R: FixedRule + 'static, + where + R: FixedRule + 'static, { match self { DbInstance::Mem(db) => db.register_fixed_rule(name, rule_impl), diff --git a/cozo-core/src/parse/mod.rs b/cozo-core/src/parse/mod.rs index 96f4bca7..be3ce2c0 100644 --- a/cozo-core/src/parse/mod.rs +++ b/cozo-core/src/parse/mod.rs @@ -5,6 +5,9 @@ * If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. */ +//! AST for Cozo scripts, for generating Cozo scripts programmatically. +//! +//! NOTE! This is unstable, the AST structure and method signatures may change in any release. Use at your own risk. use std::cmp::{max, min}; use std::collections::{BTreeMap, BTreeSet}; @@ -42,26 +45,34 @@ pub(crate) struct CozoScriptParser; pub(crate) type Pair<'a> = pest::iterators::Pair<'a, Rule>; pub(crate) type Pairs<'a> = pest::iterators::Pairs<'a, Rule>; -pub(crate) enum CozoScript { +/// This represents a full Cozo script, as you'd pass to `run_script`. +#[derive(Debug)] +pub enum CozoScript { + #[allow(missing_docs)] Single(InputProgram), + #[allow(missing_docs)] Imperative(ImperativeProgram), + #[allow(missing_docs)] Sys(SysOp), } +#[allow(missing_docs)] #[derive(Debug)] -pub(crate) struct ImperativeStmtClause { - pub(crate) prog: InputProgram, - pub(crate) store_as: Option>, +pub struct ImperativeStmtClause { + pub prog: InputProgram, + pub store_as: Option>, } +#[allow(missing_docs)] #[derive(Debug)] -pub(crate) struct ImperativeSysop { - pub(crate) sysop: SysOp, - pub(crate) store_as: Option>, +pub struct ImperativeSysop { + pub sysop: SysOp, + pub store_as: Option>, } +#[allow(missing_docs)] #[derive(Debug)] -pub(crate) enum ImperativeStmt { +pub enum ImperativeStmt { Break { target: Option>, span: SourceSpan, @@ -104,7 +115,9 @@ pub(crate) enum ImperativeStmt { pub(crate) type ImperativeCondition = Either, ImperativeStmtClause>; -pub(crate) type ImperativeProgram = Vec; +/// This is a [chained query](https://docs.cozodb.org/en/latest/stored.html#chaining-queries), +/// a series of `{}` queries possibly with imperative directives like `%if` and `%loop`. +pub type ImperativeProgram = Vec; impl ImperativeStmt { pub(crate) fn needs_write_locks(&self, collector: &mut BTreeSet>) { @@ -148,41 +161,48 @@ impl ImperativeStmt { | ImperativeStmt::Break { .. } | ImperativeStmt::Continue { .. } | ImperativeStmt::TempSwap { .. } => {} - ImperativeStmt::SysOp { sysop } => { - match &sysop.sysop { - SysOp::RemoveRelation(rels) => { - for rel in rels { - collector.insert(rel.name.clone()); - } - } - SysOp::RenameRelation(renames) => { - for (old, new) in renames { - collector.insert(old.name.clone()); - collector.insert(new.name.clone()); - } - } - SysOp::CreateIndex(symb, subs, _) => { - collector.insert(symb.name.clone()); - collector.insert(SmartString::from(format!("{}:{}", symb.name, subs.name))); - } - SysOp::CreateVectorIndex(m) => { - collector.insert(m.base_relation.clone()); - collector.insert(SmartString::from(format!("{}:{}", m.base_relation, m.index_name))); - } - SysOp::CreateFtsIndex(m) => { - collector.insert(m.base_relation.clone()); - collector.insert(SmartString::from(format!("{}:{}", m.base_relation, m.index_name))); - } - SysOp::CreateMinHashLshIndex(m) => { - collector.insert(m.base_relation.clone()); - collector.insert(SmartString::from(format!("{}:{}", m.base_relation, m.index_name))); + ImperativeStmt::SysOp { sysop } => match &sysop.sysop { + SysOp::RemoveRelation(rels) => { + for rel in rels { + collector.insert(rel.name.clone()); } - SysOp::RemoveIndex(rel, idx) => { - collector.insert(SmartString::from(format!("{}:{}", rel.name, idx.name))); + } + SysOp::RenameRelation(renames) => { + for (old, new) in renames { + collector.insert(old.name.clone()); + collector.insert(new.name.clone()); } - _ => {} } - } + SysOp::CreateIndex(symb, subs, _) => { + collector.insert(symb.name.clone()); + collector.insert(SmartString::from(format!("{}:{}", symb.name, subs.name))); + } + SysOp::CreateVectorIndex(m) => { + collector.insert(m.base_relation.clone()); + collector.insert(SmartString::from(format!( + "{}:{}", + m.base_relation, m.index_name + ))); + } + SysOp::CreateFtsIndex(m) => { + collector.insert(m.base_relation.clone()); + collector.insert(SmartString::from(format!( + "{}:{}", + m.base_relation, m.index_name + ))); + } + SysOp::CreateMinHashLshIndex(m) => { + collector.insert(m.base_relation.clone()); + collector.insert(SmartString::from(format!( + "{}:{}", + m.base_relation, m.index_name + ))); + } + SysOp::RemoveIndex(rel, idx) => { + collector.insert(SmartString::from(format!("{}:{}", rel.name, idx.name))); + } + _ => {} + }, } } } @@ -272,7 +292,18 @@ pub(crate) fn parse_expressions( build_expr(parsed.into_inner().next().unwrap(), param_pool) } -pub(crate) fn parse_script( +/// This parses a text script into the AST used by Cozo. +/// +/// Note! This is an unstable interface, the signature may change between releases. Depend on it at your own risk. +/// +/// * `src` - the script to parse +/// +/// * `param_pool` - the list of parameters to execute the script with. These are substituted into the syntax tree during parsing. +/// +/// * `fixed_rules` - a mapping of fixed rule names to their implementations. These are substituted into the syntax tree during parsing. +/// +/// * `cur_vld` - the current timestamp, substituted into expressions where validity is relevant. +pub fn parse_script( src: &str, param_pool: &BTreeMap, fixed_rules: &BTreeMap>>, diff --git a/cozo-core/src/parse/sys.rs b/cozo-core/src/parse/sys.rs index dec5e1b8..25b80a0f 100644 --- a/cozo-core/src/parse/sys.rs +++ b/cozo-core/src/parse/sys.rs @@ -27,7 +27,7 @@ use crate::runtime::relation::AccessLevel; use crate::{Expr, FixedRule}; #[derive(Debug)] -pub(crate) enum SysOp { +pub enum SysOp { Compact, ListColumns(Symbol), ListIndices(Symbol), @@ -46,51 +46,52 @@ pub(crate) enum SysOp { CreateFtsIndex(FtsIndexConfig), CreateMinHashLshIndex(MinHashLshConfig), RemoveIndex(Symbol, Symbol), - DescribeRelation(Symbol, SmartString) + DescribeRelation(Symbol, SmartString), } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct FtsIndexConfig { - pub(crate) base_relation: SmartString, - pub(crate) index_name: SmartString, - pub(crate) extractor: String, - pub(crate) tokenizer: TokenizerConfig, - pub(crate) filters: Vec, +pub struct FtsIndexConfig { + pub base_relation: SmartString, + pub index_name: SmartString, + pub extractor: String, + pub tokenizer: TokenizerConfig, + pub filters: Vec, } +#[allow(missing_docs)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct MinHashLshConfig { - pub(crate) base_relation: SmartString, - pub(crate) index_name: SmartString, - pub(crate) extractor: String, - pub(crate) tokenizer: TokenizerConfig, - pub(crate) filters: Vec, - pub(crate) n_gram: usize, - pub(crate) n_perm: usize, - pub(crate) false_positive_weight: OrderedFloat, - pub(crate) false_negative_weight: OrderedFloat, - pub(crate) target_threshold: OrderedFloat, +pub struct MinHashLshConfig { + pub base_relation: SmartString, + pub index_name: SmartString, + pub extractor: String, + pub tokenizer: TokenizerConfig, + pub filters: Vec, + pub n_gram: usize, + pub n_perm: usize, + pub false_positive_weight: OrderedFloat, + pub false_negative_weight: OrderedFloat, + pub target_threshold: OrderedFloat, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct HnswIndexConfig { - pub(crate) base_relation: SmartString, - pub(crate) index_name: SmartString, - pub(crate) vec_dim: usize, - pub(crate) dtype: VecElementType, - pub(crate) vec_fields: Vec>, - pub(crate) distance: HnswDistance, - pub(crate) ef_construction: usize, - pub(crate) m_neighbours: usize, - pub(crate) index_filter: Option, - pub(crate) extend_candidates: bool, - pub(crate) keep_pruned_connections: bool, +pub struct HnswIndexConfig { + pub base_relation: SmartString, + pub index_name: SmartString, + pub vec_dim: usize, + pub dtype: VecElementType, + pub vec_fields: Vec>, + pub distance: HnswDistance, + pub ef_construction: usize, + pub m_neighbours: usize, + pub index_filter: Option, + pub extend_candidates: bool, + pub keep_pruned_connections: bool, } #[derive( Debug, Clone, Copy, PartialEq, Eq, Hash, serde_derive::Serialize, serde_derive::Deserialize, )] -pub(crate) enum HnswDistance { +pub enum HnswDistance { L2, InnerProduct, Cosine, diff --git a/cozo-core/src/runtime/db.rs b/cozo-core/src/runtime/db.rs index abac709b..fac9ec5a 100644 --- a/cozo-core/src/runtime/db.rs +++ b/cozo-core/src/runtime/db.rs @@ -394,6 +394,11 @@ impl<'s, S: Storage<'s>> Db { } } + /// This returns the set of fixed rule implementations for this specific backend. + pub fn get_fixed_rules(&'s self) -> BTreeMap>> { + return self.fixed_rules.read().unwrap().clone(); + } + /// Run the CozoScript passed in. The `params` argument is a map of parameters. pub fn run_script( &'s self, @@ -401,12 +406,15 @@ impl<'s, S: Storage<'s>> Db { params: BTreeMap, mutability: ScriptMutability, ) -> Result { - let cur_vld = current_validity(); - self.do_run_script( - payload, - ¶ms, - cur_vld, - mutability == ScriptMutability::Immutable, + self.run_script_ast( + parse_script( + payload, + ¶ms, + &self.get_fixed_rules(), + current_validity(), + )?, + current_validity(), + mutability, ) } @@ -416,8 +424,22 @@ impl<'s, S: Storage<'s>> Db { payload: &str, params: BTreeMap, ) -> Result { - let cur_vld = current_validity(); - self.do_run_script(payload, ¶ms, cur_vld, true) + self.run_script(payload, params, ScriptMutability::Immutable) + } + + /// Run the AST CozoScript passed in. + pub fn run_script_ast( + &'s self, + payload: CozoScript, + cur_vld: ValidityTs, + mutability: ScriptMutability, + ) -> Result { + let read_only = mutability == ScriptMutability::Immutable; + match payload { + CozoScript::Single(p) => self.execute_single(cur_vld, p, read_only), + CozoScript::Imperative(ps) => self.execute_imperative(cur_vld, &ps, read_only), + CozoScript::Sys(op) => self.run_sys_op(op, read_only), + } } /// Export relations to JSON data. @@ -889,25 +911,6 @@ impl<'s, S: Storage<'s>> Db { Ok(q_res) } - fn do_run_script( - &'s self, - payload: &str, - param_pool: &BTreeMap, - cur_vld: ValidityTs, - read_only: bool, - ) -> Result { - match parse_script( - payload, - param_pool, - &self.fixed_rules.read().unwrap(), - cur_vld, - )? { - CozoScript::Single(p) => self.execute_single(cur_vld, p, read_only), - CozoScript::Imperative(ps) => self.execute_imperative(cur_vld, &ps, read_only), - CozoScript::Sys(op) => self.run_sys_op(op, read_only), - } - } - fn execute_single( &'s self, cur_vld: ValidityTs, diff --git a/cozo-core/src/runtime/relation.rs b/cozo-core/src/runtime/relation.rs index 932c7aeb..473abc9d 100644 --- a/cozo-core/src/runtime/relation.rs +++ b/cozo-core/src/runtime/relation.rs @@ -119,7 +119,7 @@ impl RelationHandle { Ord, PartialOrd, )] -pub(crate) enum AccessLevel { +pub enum AccessLevel { Hidden, ReadOnly, Protected, From 2b1f12d38cf4513cb57eee02e4acc3b23ed9baad Mon Sep 17 00:00:00 2001 From: Andrew Baxter <> Date: Sun, 20 Oct 2024 16:29:36 +0900 Subject: [PATCH 2/2] More doc --- cozo-core/src/data/program.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cozo-core/src/data/program.rs b/cozo-core/src/data/program.rs index cf7e8ee2..8319deaa 100644 --- a/cozo-core/src/data/program.rs +++ b/cozo-core/src/data/program.rs @@ -1804,8 +1804,10 @@ pub(crate) struct MagicRelationApplyAtom { #[derive(Clone, Debug)] pub struct Unification { + /// Symbol to bind expression to. pub binding: Symbol, pub expr: Expr, + /// If false, `=`, if true, `in`. If true, one row is created for each value in the list in `expr`. pub one_many_unif: bool, pub span: SourceSpan, }