feat: programming_books example (#314)

Please be sure to look over the pull request guidelines here: https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md#submit-pr. # Please go through the following checklist - [ ] The PR title and commit messages adhere to guidelines here: https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md. In particular `!` is used if and only if at least one breaking change has been introduced. - [ ] I have run the ci check script with `source scripts/run_ci_checks.sh`. # Rationale for this change  # What changes are included in this PR?  # Are these changes tested?  Yes: Parsing the query: SELECT COUNT(*) AS total_books FROM books... Done in 5.759542000000001 ms. Generating proof...Done in 258.379792 ms. Verifying proof...Verified in 82.497291 ms. Query Result: Ok(OwnedTable { table: {Identifier { name: "total_books" }: BigInt([10])} }) Parsing the query: SELECT title, author FROM books WHERE rating > 4.5... Done in 8.903875000000001 ms. Generating proof...Done in 350.96833399999997 ms. Verifying proof...Verified in 103.658958 ms. Query Result: Ok(OwnedTable { table: {Identifier { name: "title" }: VarChar(["Clean Code", "The Clean Coder", "Design Patterns", "Effective Java", "Introduction to Algorithms", "Code Complete"]), Identifier { name: "author" }: VarChar(["Robert C. Martin", "Robert C. Martin", "Erich Gamma", "Joshua Bloch", "Thomas H. Cormen", "Steve McConnell"])} }) Parsing the query: SELECT title, publication_year FROM books WHERE genre = 'Programming' AND publication_year > 2000... Done in 5.648333 ms. Generating proof...Done in 257.21125 ms. Verifying proof...Verified in 111.860208 ms. Query Result: Ok(OwnedTable { table: {Identifier { name: "title" }: VarChar(["Clean Code", "The Clean Coder", "Effective Java", "Code Complete"]), Identifier { name: "publication_year" }: BigInt([2008, 2011, 2008, 2004])} }) Parsing the query: SELECT author, COUNT(*) AS book_count FROM books GROUP BY author ORDER BY book_count DESC LIMIT 5... Done in 5.722208999999999 ms. Generating proof...Done in 181.42775 ms. Verifying proof...Verified in 79.493166 ms. Query Result: Ok(OwnedTable { table: {Identifier { name: "author" }: VarChar(["Robert C. Martin", "Andrew Hunt", "Erich Gamma", "Fred Brooks", "Joshua Bloch"]), Identifier { name: "book_count" }: BigInt([2, 1, 1, 1, 1])} })
spaceandtimelabs · Oct 29, 2024 · cbc33c2 · cbc33c2
2 parents 3c666a1 + 4d51ff2
commit cbc33c2
Show file tree

Hide file tree

Showing 4 changed files with 151 additions and 1 deletion.
diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml
@@ -122,6 +122,8 @@ jobs:
         run: cargo run --example dinosaurs
       - name: Run books example
         run: cargo run --example books
+      - name: Run programming books example
+        run: cargo run --example programming_books
       - name: Run brands example
         run: cargo run --example brands
       - name: Run avocado-prices example

diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml
@@ -111,6 +111,10 @@ required-features = [ "arrow" ]
 name = "books"
 required-features = [ "arrow" ]
 
+[[example]]
+name = "programming_books"
+required-features = ["arrow"]
+
 [[example]]
 name = "brands"
 required-features = [ "arrow" ]
@@ -148,4 +152,4 @@ required-features = [ "test" ]
 [[bench]]
 name = "jaeger_benches"
 harness = false
-required-features = [ "blitzar" ]
+required-features = [ "blitzar" ]
diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs
@@ -0,0 +1,133 @@
+//! This is a non-interactive example of using Proof of SQL with an extended books dataset.
+//! To run this, use `cargo run --example programming_books`.
+//!
+//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
+//! you can run `cargo run --example programming_books --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.
+
+use arrow::datatypes::SchemaRef;
+use arrow_csv::{infer_schema_from_files, ReaderBuilder};
+use proof_of_sql::{
+    base::database::{
+        arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
+        TestAccessor,
+    },
+    proof_primitive::dory::{
+        DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters,
+        VerifierSetup,
+    },
+    sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof},
+};
+use rand::{rngs::StdRng, SeedableRng};
+use std::{fs::File, time::Instant};
+
+const DORY_SETUP_MAX_NU: usize = 8;
+const DORY_SEED: [u8; 32] = *b"ebab60d58dee4cc69658939b7c2a582d";
+
+/// # Panics
+/// Will panic if the query does not parse or the proof fails to verify.
+fn prove_and_verify_query(
+    sql: &str,
+    accessor: &OwnedTableTestAccessor<DynamicDoryEvaluationProof>,
+    prover_setup: &ProverSetup,
+    verifier_setup: &VerifierSetup,
+) {
+    // Parse the query:
+    println!("Parsing the query: {sql}...");
+    let now = Instant::now();
+    let query_plan = QueryExpr::<DynamicDoryCommitment>::try_new(
+        sql.parse().unwrap(),
+        "programming_books".parse().unwrap(),
+        accessor,
+    )
+    .unwrap();
+    println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);
+
+    // Generate the proof and result:
+    print!("Generating proof...");
+    let now = Instant::now();
+    let (proof, provable_result) = QueryProof::<DynamicDoryEvaluationProof>::new(
+        query_plan.proof_expr(),
+        accessor,
+        &prover_setup,
+    );
+    println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);
+
+    // Verify the result with the proof:
+    print!("Verifying proof...");
+    let now = Instant::now();
+    let result = proof
+        .verify(
+            query_plan.proof_expr(),
+            accessor,
+            &provable_result,
+            &verifier_setup,
+        )
+        .unwrap();
+    let result = apply_postprocessing_steps(result.table, query_plan.postprocessing());
+    println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.);
+
+    // Display the result
+    println!("Query Result:");
+    println!("{result:?}");
+}
+
+fn main() {
+    let mut rng = StdRng::from_seed(DORY_SEED);
+    let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng);
+    let prover_setup = ProverSetup::from(&public_parameters);
+    let verifier_setup = VerifierSetup::from(&public_parameters);
+
+    let filename = "./crates/proof-of-sql/examples/programming_books/programming_books.csv";
+    let inferred_schema =
+        SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap());
+    let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema);
+
+    let books_extra_batch = ReaderBuilder::new(posql_compatible_schema)
+        .with_header(true)
+        .build(File::open(filename).unwrap())
+        .unwrap()
+        .next()
+        .unwrap()
+        .unwrap();
+
+    // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments.
+    let mut accessor =
+        OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
+    accessor.add_table(
+        "programming_books.books".parse().unwrap(),
+        OwnedTable::try_from(books_extra_batch).unwrap(),
+        0,
+    );
+
+    // Query 1: Count the total number of books
+    prove_and_verify_query(
+        "SELECT COUNT(*) AS total_books FROM books",
+        &accessor,
+        &prover_setup,
+        &verifier_setup,
+    );
+
+    // Query 2: Find books with a rating higher than 4.5
+    prove_and_verify_query(
+        "SELECT title, author FROM books WHERE rating > 4.5",
+        &accessor,
+        &prover_setup,
+        &verifier_setup,
+    );
+
+    // Query 3: List all programming books published after 2000
+    prove_and_verify_query(
+        "SELECT title, publication_year FROM books WHERE genre = 'Programming' AND publication_year > 2000",
+        &accessor,
+        &prover_setup,
+        &verifier_setup,
+    );
+
+    // Query 4: Find the top 5 authors with the most books
+    prove_and_verify_query(
+        "SELECT author, COUNT(*) AS book_count FROM books GROUP BY author ORDER BY book_count DESC LIMIT 5",
+        &accessor,
+        &prover_setup,
+        &verifier_setup,
+    );
+}
diff --git a/crates/proof-of-sql/examples/programming_books/programming_books.csv b/crates/proof-of-sql/examples/programming_books/programming_books.csv
@@ -0,0 +1,11 @@
+title,author,publication_year,genre,rating
+The Pragmatic Programmer,Andrew Hunt,1999,Programming,4.5
+Clean Code,Robert C. Martin,2008,Programming,4.7
+The Clean Coder,Robert C. Martin,2011,Programming,4.6
+Design Patterns,Erich Gamma,1994,Software Engineering,4.8
+Refactoring,Martin Fowler,1999,Programming,4.5
+Effective Java,Joshua Bloch,2008,Programming,4.7
+Introduction to Algorithms,Thomas H. Cormen,2009,Computer Science,4.8
+Code Complete,Steve McConnell,2004,Programming,4.6
+The Mythical Man-Month,Fred Brooks,1975,Software Engineering,4.3
+Algorithms,Robert Sedgewick,1983,Computer Science,4.5