From dee84bd32519b484c6f8f4986d1d9dfa9bad85a7 Mon Sep 17 00:00:00 2001 From: Linwei Shang Date: Tue, 3 Oct 2023 23:10:38 -0400 Subject: [PATCH 01/23] duplicate candid as candid_parser remove code under "parser" feature --- Cargo.lock | 44 +- Cargo.toml | 1 + rust/candid/src/bindings/mod.rs | 16 - rust/candid/src/error.rs | 109 +- rust/candid/src/lib.rs | 11 - rust/candid/src/types/value.rs | 22 +- rust/candid/src/utils.rs | 97 -- rust/candid_parser/Cargo.toml | 100 ++ rust/candid_parser/LICENSE | 201 +++ rust/candid_parser/README.md | 9 + rust/candid_parser/benches/benchmark.rs | 192 +++ rust/candid_parser/build.rs | 8 + rust/candid_parser/fuzz/.gitignore | 3 + rust/candid_parser/fuzz/Cargo.lock | 968 +++++++++++++++ rust/candid_parser/fuzz/Cargo.toml | 34 + .../candid_parser/fuzz/fuzz_targets/parser.rs | 13 + .../fuzz/fuzz_targets/type_decoder.rs | 44 + rust/candid_parser/src/binary_parser.rs | 274 ++++ .../src/bindings/analysis.rs | 0 rust/candid_parser/src/bindings/candid.rs | 474 +++++++ .../src/bindings/javascript.rs | 0 rust/candid_parser/src/bindings/mod.rs | 20 + .../src/bindings/motoko.rs | 0 .../src/bindings/rust.rs | 0 .../src/bindings/typescript.rs | 0 rust/candid_parser/src/de.rs | 1103 +++++++++++++++++ rust/candid_parser/src/error.rs | 229 ++++ rust/candid_parser/src/lib.rs | 381 ++++++ .../src/parser/configs.rs | 0 .../src/parser/grammar.lalrpop | 0 .../src/parser/grammar.rs | 0 .../src/parser/mod.rs | 0 .../src/parser/random.rs | 0 .../src/parser/test.rs | 0 .../src/parser/token.rs | 0 .../src/parser/types.rs | 0 .../src/parser/typing.rs | 0 rust/candid_parser/src/pretty.rs | 89 ++ rust/candid_parser/src/ser.rs | 411 ++++++ rust/candid_parser/src/types/arc.rs | 28 + rust/candid_parser/src/types/impls.rs | 538 ++++++++ rust/candid_parser/src/types/internal.rs | 588 +++++++++ rust/candid_parser/src/types/mod.rs | 94 ++ rust/candid_parser/src/types/number.rs | 634 ++++++++++ rust/candid_parser/src/types/principal.rs | 367 ++++++ rust/candid_parser/src/types/rc.rs | 29 + rust/candid_parser/src/types/reference.rs | 157 +++ rust/candid_parser/src/types/reserved.rs | 61 + rust/candid_parser/src/types/subtype.rs | 244 ++++ rust/candid_parser/src/types/type_env.rs | 137 ++ rust/candid_parser/src/types/value.rs | 553 +++++++++ rust/candid_parser/src/utils.rs | 367 ++++++ rust/candid_parser/tests/assets/actor.did | 11 + .../tests/assets/bad_comment.did | 9 + .../candid_parser/tests/assets/bad_import.did | 3 + rust/candid_parser/tests/assets/class.did | 5 + .../tests/assets/codegen/basic/invalid_id.did | 3 + .../tests/assets/codegen/basic/invalid_id.rs | 3 + .../tests/assets/codegen/basic/prim_types.did | 6 + .../tests/assets/codegen/basic/prim_types.rs | 6 + .../tests/assets/codegen/basic/recursive.did | 8 + .../tests/assets/codegen/basic/recursive.rs | 10 + .../assets/codegen/examples/linkedup.did | 36 + .../tests/assets/codegen/examples/linkedup.rs | 46 + .../tests/assets/collision_fields.did | 6 + .../tests/assets/collision_fields2.did | 7 + rust/candid_parser/tests/assets/comment.did | 16 + rust/candid_parser/tests/assets/cyclic.did | 11 + rust/candid_parser/tests/assets/escape.did | 5 + rust/candid_parser/tests/assets/example.did | 20 + rust/candid_parser/tests/assets/fieldnat.did | 13 + rust/candid_parser/tests/assets/import/a.did | 2 + .../candid_parser/tests/assets/import/b/b.did | 1 + .../tests/assets/invalid_cyclic.did | 5 + rust/candid_parser/tests/assets/keyword.did | 25 + .../candid_parser/tests/assets/management.did | 151 +++ rust/candid_parser/tests/assets/not_func.did | 11 + rust/candid_parser/tests/assets/not_serv.did | 6 + rust/candid_parser/tests/assets/ok/actor.d.ts | 13 + rust/candid_parser/tests/assets/ok/actor.did | 5 + rust/candid_parser/tests/assets/ok/actor.js | 14 + rust/candid_parser/tests/assets/ok/actor.mo | 15 + rust/candid_parser/tests/assets/ok/actor.rs | 29 + .../tests/assets/ok/bad_comment.fail | 1 + .../tests/assets/ok/bad_import.fail | 1 + rust/candid_parser/tests/assets/ok/class.d.ts | 8 + rust/candid_parser/tests/assets/ok/class.did | 2 + rust/candid_parser/tests/assets/ok/class.js | 13 + rust/candid_parser/tests/assets/ok/class.mo | 10 + rust/candid_parser/tests/assets/ok/class.rs | 20 + .../tests/assets/ok/collision_fields.fail | 1 + .../tests/assets/ok/collision_fields2.fail | 1 + .../tests/assets/ok/comment.d.ts | 5 + .../candid_parser/tests/assets/ok/comment.did | 2 + rust/candid_parser/tests/assets/ok/comment.js | 2 + rust/candid_parser/tests/assets/ok/comment.mo | 7 + rust/candid_parser/tests/assets/ok/comment.rs | 8 + .../candid_parser/tests/assets/ok/cyclic.d.ts | 10 + rust/candid_parser/tests/assets/ok/cyclic.did | 7 + rust/candid_parser/tests/assets/ok/cyclic.js | 11 + rust/candid_parser/tests/assets/ok/cyclic.mo | 12 + rust/candid_parser/tests/assets/ok/cyclic.rs | 30 + .../candid_parser/tests/assets/ok/escape.d.ts | 10 + rust/candid_parser/tests/assets/ok/escape.did | 2 + rust/candid_parser/tests/assets/ok/escape.js | 10 + rust/candid_parser/tests/assets/ok/escape.rs | 26 + .../tests/assets/ok/example.d.ts | 48 + .../candid_parser/tests/assets/ok/example.did | 37 + rust/candid_parser/tests/assets/ok/example.js | 65 + rust/candid_parser/tests/assets/ok/example.mo | 46 + rust/candid_parser/tests/assets/ok/example.rs | 92 ++ .../tests/assets/ok/fieldnat.d.ts | 14 + .../tests/assets/ok/fieldnat.did | 11 + .../candid_parser/tests/assets/ok/fieldnat.js | 34 + .../candid_parser/tests/assets/ok/fieldnat.mo | 16 + .../candid_parser/tests/assets/ok/fieldnat.rs | 63 + .../tests/assets/ok/invalid_cyclic.fail | 1 + .../tests/assets/ok/keyword.d.ts | 32 + .../candid_parser/tests/assets/ok/keyword.did | 23 + rust/candid_parser/tests/assets/ok/keyword.js | 66 + rust/candid_parser/tests/assets/ok/keyword.mo | 28 + rust/candid_parser/tests/assets/ok/keyword.rs | 92 ++ .../tests/assets/ok/management.d.ts | 172 +++ .../tests/assets/ok/management.did | 127 ++ .../tests/assets/ok/management.js | 233 ++++ .../tests/assets/ok/management.mo | 125 ++ .../tests/assets/ok/management.rs | 370 ++++++ .../tests/assets/ok/not_func.fail | 1 + .../tests/assets/ok/not_serv.fail | 1 + .../candid_parser/tests/assets/ok/oneway.fail | 1 + .../tests/assets/ok/recursion.d.ts | 15 + .../tests/assets/ok/recursion.did | 12 + .../tests/assets/ok/recursion.js | 31 + .../tests/assets/ok/recursion.mo | 17 + .../tests/assets/ok/recursion.rs | 47 + .../tests/assets/ok/recursive_class.d.ts | 5 + .../tests/assets/ok/recursive_class.did | 2 + .../tests/assets/ok/recursive_class.js | 10 + .../tests/assets/ok/recursive_class.mo | 7 + .../tests/assets/ok/recursive_class.rs | 15 + .../tests/assets/ok/service.d.ts | 19 + .../candid_parser/tests/assets/ok/service.did | 9 + rust/candid_parser/tests/assets/ok/service.js | 26 + rust/candid_parser/tests/assets/ok/service.mo | 14 + rust/candid_parser/tests/assets/ok/service.rs | 34 + .../tests/assets/ok/surrogate.fail | 1 + .../tests/assets/ok/undefine.fail | 1 + .../tests/assets/ok/unicode.d.ts | 19 + .../candid_parser/tests/assets/ok/unicode.did | 13 + rust/candid_parser/tests/assets/ok/unicode.js | 21 + rust/candid_parser/tests/assets/ok/unicode.rs | 47 + rust/candid_parser/tests/assets/oneway.did | 3 + rust/candid_parser/tests/assets/recursion.did | 13 + .../tests/assets/recursive_class.did | 6 + rust/candid_parser/tests/assets/service.did | 9 + rust/candid_parser/tests/assets/surrogate.did | 3 + rust/candid_parser/tests/assets/undefine.did | 4 + rust/candid_parser/tests/assets/unicode.did | 16 + rust/candid_parser/tests/number.rs | 135 ++ .../tests/parse_type.rs | 10 +- .../tests/parse_value.rs | 10 +- rust/candid_parser/tests/principal.rs | 273 ++++ rust/candid_parser/tests/serde.rs | 783 ++++++++++++ .../tests/test_suite.rs | 4 +- rust/candid_parser/tests/types.rs | 299 +++++ rust/{candid => candid_parser}/tests/value.rs | 8 +- tools/didc/Cargo.toml | 2 +- 167 files changed, 12720 insertions(+), 271 deletions(-) create mode 100644 rust/candid_parser/Cargo.toml create mode 100644 rust/candid_parser/LICENSE create mode 100644 rust/candid_parser/README.md create mode 100644 rust/candid_parser/benches/benchmark.rs create mode 100644 rust/candid_parser/build.rs create mode 100644 rust/candid_parser/fuzz/.gitignore create mode 100644 rust/candid_parser/fuzz/Cargo.lock create mode 100644 rust/candid_parser/fuzz/Cargo.toml create mode 100644 rust/candid_parser/fuzz/fuzz_targets/parser.rs create mode 100644 rust/candid_parser/fuzz/fuzz_targets/type_decoder.rs create mode 100644 rust/candid_parser/src/binary_parser.rs rename rust/{candid => candid_parser}/src/bindings/analysis.rs (100%) create mode 100644 rust/candid_parser/src/bindings/candid.rs rename rust/{candid => candid_parser}/src/bindings/javascript.rs (100%) create mode 100644 rust/candid_parser/src/bindings/mod.rs rename rust/{candid => candid_parser}/src/bindings/motoko.rs (100%) rename rust/{candid => candid_parser}/src/bindings/rust.rs (100%) rename rust/{candid => candid_parser}/src/bindings/typescript.rs (100%) create mode 100644 rust/candid_parser/src/de.rs create mode 100644 rust/candid_parser/src/error.rs create mode 100644 rust/candid_parser/src/lib.rs rename rust/{candid => candid_parser}/src/parser/configs.rs (100%) rename rust/{candid => candid_parser}/src/parser/grammar.lalrpop (100%) rename rust/{candid => candid_parser}/src/parser/grammar.rs (100%) rename rust/{candid => candid_parser}/src/parser/mod.rs (100%) rename rust/{candid => candid_parser}/src/parser/random.rs (100%) rename rust/{candid => candid_parser}/src/parser/test.rs (100%) rename rust/{candid => candid_parser}/src/parser/token.rs (100%) rename rust/{candid => candid_parser}/src/parser/types.rs (100%) rename rust/{candid => candid_parser}/src/parser/typing.rs (100%) create mode 100644 rust/candid_parser/src/pretty.rs create mode 100644 rust/candid_parser/src/ser.rs create mode 100644 rust/candid_parser/src/types/arc.rs create mode 100644 rust/candid_parser/src/types/impls.rs create mode 100644 rust/candid_parser/src/types/internal.rs create mode 100644 rust/candid_parser/src/types/mod.rs create mode 100644 rust/candid_parser/src/types/number.rs create mode 100644 rust/candid_parser/src/types/principal.rs create mode 100644 rust/candid_parser/src/types/rc.rs create mode 100644 rust/candid_parser/src/types/reference.rs create mode 100644 rust/candid_parser/src/types/reserved.rs create mode 100644 rust/candid_parser/src/types/subtype.rs create mode 100644 rust/candid_parser/src/types/type_env.rs create mode 100644 rust/candid_parser/src/types/value.rs create mode 100644 rust/candid_parser/src/utils.rs create mode 100644 rust/candid_parser/tests/assets/actor.did create mode 100644 rust/candid_parser/tests/assets/bad_comment.did create mode 100644 rust/candid_parser/tests/assets/bad_import.did create mode 100644 rust/candid_parser/tests/assets/class.did create mode 100644 rust/candid_parser/tests/assets/codegen/basic/invalid_id.did create mode 100644 rust/candid_parser/tests/assets/codegen/basic/invalid_id.rs create mode 100644 rust/candid_parser/tests/assets/codegen/basic/prim_types.did create mode 100644 rust/candid_parser/tests/assets/codegen/basic/prim_types.rs create mode 100644 rust/candid_parser/tests/assets/codegen/basic/recursive.did create mode 100644 rust/candid_parser/tests/assets/codegen/basic/recursive.rs create mode 100644 rust/candid_parser/tests/assets/codegen/examples/linkedup.did create mode 100644 rust/candid_parser/tests/assets/codegen/examples/linkedup.rs create mode 100644 rust/candid_parser/tests/assets/collision_fields.did create mode 100644 rust/candid_parser/tests/assets/collision_fields2.did create mode 100644 rust/candid_parser/tests/assets/comment.did create mode 100644 rust/candid_parser/tests/assets/cyclic.did create mode 100644 rust/candid_parser/tests/assets/escape.did create mode 100644 rust/candid_parser/tests/assets/example.did create mode 100644 rust/candid_parser/tests/assets/fieldnat.did create mode 100644 rust/candid_parser/tests/assets/import/a.did create mode 100644 rust/candid_parser/tests/assets/import/b/b.did create mode 100644 rust/candid_parser/tests/assets/invalid_cyclic.did create mode 100644 rust/candid_parser/tests/assets/keyword.did create mode 100644 rust/candid_parser/tests/assets/management.did create mode 100644 rust/candid_parser/tests/assets/not_func.did create mode 100644 rust/candid_parser/tests/assets/not_serv.did create mode 100644 rust/candid_parser/tests/assets/ok/actor.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/actor.did create mode 100644 rust/candid_parser/tests/assets/ok/actor.js create mode 100644 rust/candid_parser/tests/assets/ok/actor.mo create mode 100644 rust/candid_parser/tests/assets/ok/actor.rs create mode 100644 rust/candid_parser/tests/assets/ok/bad_comment.fail create mode 100644 rust/candid_parser/tests/assets/ok/bad_import.fail create mode 100644 rust/candid_parser/tests/assets/ok/class.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/class.did create mode 100644 rust/candid_parser/tests/assets/ok/class.js create mode 100644 rust/candid_parser/tests/assets/ok/class.mo create mode 100644 rust/candid_parser/tests/assets/ok/class.rs create mode 100644 rust/candid_parser/tests/assets/ok/collision_fields.fail create mode 100644 rust/candid_parser/tests/assets/ok/collision_fields2.fail create mode 100644 rust/candid_parser/tests/assets/ok/comment.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/comment.did create mode 100644 rust/candid_parser/tests/assets/ok/comment.js create mode 100644 rust/candid_parser/tests/assets/ok/comment.mo create mode 100644 rust/candid_parser/tests/assets/ok/comment.rs create mode 100644 rust/candid_parser/tests/assets/ok/cyclic.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/cyclic.did create mode 100644 rust/candid_parser/tests/assets/ok/cyclic.js create mode 100644 rust/candid_parser/tests/assets/ok/cyclic.mo create mode 100644 rust/candid_parser/tests/assets/ok/cyclic.rs create mode 100644 rust/candid_parser/tests/assets/ok/escape.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/escape.did create mode 100644 rust/candid_parser/tests/assets/ok/escape.js create mode 100644 rust/candid_parser/tests/assets/ok/escape.rs create mode 100644 rust/candid_parser/tests/assets/ok/example.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/example.did create mode 100644 rust/candid_parser/tests/assets/ok/example.js create mode 100644 rust/candid_parser/tests/assets/ok/example.mo create mode 100644 rust/candid_parser/tests/assets/ok/example.rs create mode 100644 rust/candid_parser/tests/assets/ok/fieldnat.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/fieldnat.did create mode 100644 rust/candid_parser/tests/assets/ok/fieldnat.js create mode 100644 rust/candid_parser/tests/assets/ok/fieldnat.mo create mode 100644 rust/candid_parser/tests/assets/ok/fieldnat.rs create mode 100644 rust/candid_parser/tests/assets/ok/invalid_cyclic.fail create mode 100644 rust/candid_parser/tests/assets/ok/keyword.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/keyword.did create mode 100644 rust/candid_parser/tests/assets/ok/keyword.js create mode 100644 rust/candid_parser/tests/assets/ok/keyword.mo create mode 100644 rust/candid_parser/tests/assets/ok/keyword.rs create mode 100644 rust/candid_parser/tests/assets/ok/management.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/management.did create mode 100644 rust/candid_parser/tests/assets/ok/management.js create mode 100644 rust/candid_parser/tests/assets/ok/management.mo create mode 100644 rust/candid_parser/tests/assets/ok/management.rs create mode 100644 rust/candid_parser/tests/assets/ok/not_func.fail create mode 100644 rust/candid_parser/tests/assets/ok/not_serv.fail create mode 100644 rust/candid_parser/tests/assets/ok/oneway.fail create mode 100644 rust/candid_parser/tests/assets/ok/recursion.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/recursion.did create mode 100644 rust/candid_parser/tests/assets/ok/recursion.js create mode 100644 rust/candid_parser/tests/assets/ok/recursion.mo create mode 100644 rust/candid_parser/tests/assets/ok/recursion.rs create mode 100644 rust/candid_parser/tests/assets/ok/recursive_class.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/recursive_class.did create mode 100644 rust/candid_parser/tests/assets/ok/recursive_class.js create mode 100644 rust/candid_parser/tests/assets/ok/recursive_class.mo create mode 100644 rust/candid_parser/tests/assets/ok/recursive_class.rs create mode 100644 rust/candid_parser/tests/assets/ok/service.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/service.did create mode 100644 rust/candid_parser/tests/assets/ok/service.js create mode 100644 rust/candid_parser/tests/assets/ok/service.mo create mode 100644 rust/candid_parser/tests/assets/ok/service.rs create mode 100644 rust/candid_parser/tests/assets/ok/surrogate.fail create mode 100644 rust/candid_parser/tests/assets/ok/undefine.fail create mode 100644 rust/candid_parser/tests/assets/ok/unicode.d.ts create mode 100644 rust/candid_parser/tests/assets/ok/unicode.did create mode 100644 rust/candid_parser/tests/assets/ok/unicode.js create mode 100644 rust/candid_parser/tests/assets/ok/unicode.rs create mode 100644 rust/candid_parser/tests/assets/oneway.did create mode 100644 rust/candid_parser/tests/assets/recursion.did create mode 100644 rust/candid_parser/tests/assets/recursive_class.did create mode 100644 rust/candid_parser/tests/assets/service.did create mode 100644 rust/candid_parser/tests/assets/surrogate.did create mode 100644 rust/candid_parser/tests/assets/undefine.did create mode 100644 rust/candid_parser/tests/assets/unicode.did create mode 100644 rust/candid_parser/tests/number.rs rename rust/{candid => candid_parser}/tests/parse_type.rs (91%) rename rust/{candid => candid_parser}/tests/parse_value.rs (96%) create mode 100644 rust/candid_parser/tests/principal.rs create mode 100644 rust/candid_parser/tests/serde.rs rename rust/{candid => candid_parser}/tests/test_suite.rs (85%) create mode 100644 rust/candid_parser/tests/types.rs rename rust/{candid => candid_parser}/tests/value.rs (95%) diff --git a/Cargo.lock b/Cargo.lock index 3e9c4bf4..33b10cad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -306,6 +306,48 @@ dependencies = [ "syn 2.0.37", ] +[[package]] +name = "candid_parser" +version = "0.1.0" +dependencies = [ + "anyhow", + "arbitrary", + "bincode", + "binread", + "byteorder", + "candid", + "candid_derive", + "codespan-reporting", + "convert_case", + "crc32fast", + "criterion", + "data-encoding", + "fake", + "goldenfile", + "hex", + "impls", + "lalrpop", + "lalrpop-util", + "leb128", + "logos", + "num-bigint", + "num-traits", + "num_enum", + "paste", + "pretty 0.12.3", + "rand", + "serde", + "serde_bytes", + "serde_cbor", + "serde_dhall", + "serde_json", + "serde_test", + "sha2 0.10.8", + "stacker", + "test-generator", + "thiserror", +] + [[package]] name = "cast" version = "0.3.0" @@ -602,7 +644,7 @@ name = "didc" version = "0.3.5" dependencies = [ "anyhow", - "candid", + "candid_parser", "clap 4.4.5", "hex", "pretty-hex", diff --git a/Cargo.toml b/Cargo.toml index 352d28c5..f7a86b05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "rust/candid", + "rust/candid_parser", "rust/candid_derive", "tools/didc", ] diff --git a/rust/candid/src/bindings/mod.rs b/rust/candid/src/bindings/mod.rs index 89d86ab3..51e6868b 100644 --- a/rust/candid/src/bindings/mod.rs +++ b/rust/candid/src/bindings/mod.rs @@ -2,19 +2,3 @@ // This module assumes the input are type checked, it is safe to use unwrap. pub mod candid; - -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub mod analysis; -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub mod javascript; -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub mod motoko; -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub mod rust; -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub mod typescript; diff --git a/rust/candid/src/error.rs b/rust/candid/src/error.rs index 9181d4a0..adae0cda 100644 --- a/rust/candid/src/error.rs +++ b/rust/candid/src/error.rs @@ -5,24 +5,10 @@ use serde::{de, ser}; use std::io; use thiserror::Error; -#[cfg(feature = "parser")] -use crate::parser::token; -#[cfg(feature = "parser")] -use codespan_reporting::{ - diagnostic::Diagnostic, - files::{Error as ReportError, SimpleFile}, - term::{self, termcolor::StandardStream}, -}; - pub type Result = std::result::Result; #[derive(Debug, Error)] pub enum Error { - #[cfg_attr(docsrs, doc(cfg(feature = "parser")))] - #[cfg(feature = "parser")] - #[error("Candid parser error: {0}")] - Parse(#[from] token::ParserError), - #[error("binary parser error: {}", .0.get(0).map(|f| format!("{} at byte offset {}", f.message, f.range.start/2)).unwrap_or_else(|| "io error".to_string()))] Binread(Vec>), @@ -40,42 +26,6 @@ impl Error { pub fn subtype(msg: T) -> Self { Error::Subtype(msg.to_string()) } - #[cfg_attr(docsrs, doc(cfg(feature = "parser")))] - #[cfg(feature = "parser")] - pub fn report(&self) -> Diagnostic<()> { - match self { - Error::Parse(e) => { - use lalrpop_util::ParseError::*; - let mut diag = Diagnostic::error().with_message("parser error"); - let label = match e { - User { error } => { - Label::primary((), error.span.clone()).with_message(&error.err) - } - InvalidToken { location } => { - Label::primary((), *location..location + 1).with_message("Invalid token") - } - UnrecognizedEof { location, expected } => { - diag = diag.with_notes(report_expected(expected)); - Label::primary((), *location..location + 1).with_message("Unexpected EOF") - } - UnrecognizedToken { token, expected } => { - diag = diag.with_notes(report_expected(expected)); - Label::primary((), token.0..token.2).with_message("Unexpected token") - } - ExtraToken { token } => { - Label::primary((), token.0..token.2).with_message("Extra token") - } - }; - diag.with_labels(vec![label]) - } - Error::Binread(labels) => { - let diag = Diagnostic::error().with_message("decoding error"); - diag.with_labels(labels.to_vec()) - } - Error::Subtype(e) => Diagnostic::error().with_message(e), - Error::Custom(e) => Diagnostic::error().with_message(e.to_string()), - } - } } fn get_binread_labels(e: &binread::Error) -> Vec> { @@ -123,26 +73,6 @@ fn get_binread_labels(e: &binread::Error) -> Vec> { } } -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -fn report_expected(expected: &[String]) -> Vec { - if expected.is_empty() { - return Vec::new(); - } - use pretty::RcDoc; - let doc: RcDoc<()> = RcDoc::intersperse( - expected.iter().map(RcDoc::text), - RcDoc::text(",").append(RcDoc::softline()), - ); - let header = if expected.len() == 1 { - "Expects" - } else { - "Expects one of" - }; - let doc = RcDoc::text(header).append(RcDoc::softline().append(doc)); - vec![doc.pretty(70).to_string()] -} - impl ser::Error for Error { fn custom(msg: T) -> Self { Error::msg(format!("Serialize error: {msg}")) @@ -174,13 +104,7 @@ impl From for Error { Error::Binread(get_binread_labels(&e)) } } -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -impl From for Error { - fn from(e: ReportError) -> Error { - Error::msg(e) - } -} + #[cfg_attr(docsrs, doc(cfg(feature = "random")))] #[cfg(feature = "random")] impl From for Error { @@ -196,34 +120,3 @@ impl From for Error { Error::msg(format!("dhall error: {e}")) } } - -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub fn pretty_parse(name: &str, str: &str) -> Result -where - T: std::str::FromStr, -{ - str.parse::().or_else(|e| { - let writer = StandardStream::stderr(term::termcolor::ColorChoice::Auto); - let config = term::Config::default(); - let file = SimpleFile::new(name, str); - term::emit(&mut writer.lock(), &config, &file, &e.report())?; - Err(e) - }) -} -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub fn pretty_read(reader: &mut std::io::Cursor<&[u8]>) -> Result -where - T: binread::BinRead, -{ - T::read(reader).or_else(|e| { - let e = Error::from(e); - let writer = StandardStream::stderr(term::termcolor::ColorChoice::Auto); - let config = term::Config::default(); - let str = hex::encode(reader.get_ref()); - let file = SimpleFile::new("binary", &str); - term::emit(&mut writer.lock(), &config, &file, &e.report())?; - Err(e) - }) -} diff --git a/rust/candid/src/lib.rs b/rust/candid/src/lib.rs index 446735af..6a9c5dc2 100644 --- a/rust/candid/src/lib.rs +++ b/rust/candid/src/lib.rs @@ -353,17 +353,6 @@ pub mod utils; pub use utils::{decode_args, decode_one, encode_args, encode_one, write_args}; pub mod pretty; -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub mod parser; -#[cfg(feature = "parser")] -pub use error::{pretty_parse, pretty_read}; -#[cfg(feature = "parser")] -pub use parser::{ - types::IDLProg, - typing::{check_file, check_prog, pretty_check_file}, -}; - pub mod bindings; // Candid hash function comes from diff --git a/rust/candid/src/types/value.rs b/rust/candid/src/types/value.rs index 89ac3cf5..94771d92 100644 --- a/rust/candid/src/types/value.rs +++ b/rust/candid/src/types/value.rs @@ -239,27 +239,7 @@ impl IDLValue { (IDLValue::Principal(id), TypeInner::Principal) => IDLValue::Principal(*id), (IDLValue::Service(_), TypeInner::Service(_)) => self.clone(), (IDLValue::Func(_, _), TypeInner::Func(_)) => self.clone(), - #[cfg(feature = "parser")] - (IDLValue::Number(str), _) if from_parser => { - use crate::parser::token::error; - match t.as_ref() { - TypeInner::Int => IDLValue::Int(str.parse::()?), - TypeInner::Nat => IDLValue::Nat(str.parse::()?), - TypeInner::Nat8 => IDLValue::Nat8(str.parse::().map_err(error)?), - TypeInner::Nat16 => IDLValue::Nat16(str.parse::().map_err(error)?), - TypeInner::Nat32 => IDLValue::Nat32(str.parse::().map_err(error)?), - TypeInner::Nat64 => IDLValue::Nat64(str.parse::().map_err(error)?), - TypeInner::Int8 => IDLValue::Int8(str.parse::().map_err(error)?), - TypeInner::Int16 => IDLValue::Int16(str.parse::().map_err(error)?), - TypeInner::Int32 => IDLValue::Int32(str.parse::().map_err(error)?), - TypeInner::Int64 => IDLValue::Int64(str.parse::().map_err(error)?), - _ => { - return Err(Error::msg(format!( - "type mismatch: {self} can not be of type {t}" - ))) - } - } - } + _ => { return Err(Error::msg(format!( "type mismatch: {self} cannot be of type {t}" diff --git a/rust/candid/src/utils.rs b/rust/candid/src/utils.rs index 64bee0a2..be57f900 100644 --- a/rust/candid/src/utils.rs +++ b/rust/candid/src/utils.rs @@ -3,13 +3,6 @@ use crate::ser::IDLBuilder; use crate::{CandidType, Error, Result}; use serde::de::Deserialize; -#[cfg(feature = "parser")] -use crate::{check_prog, pretty_check_file}; -#[cfg(feature = "parser")] -use crate::{pretty_parse, types::Type, TypeEnv}; -#[cfg(feature = "parser")] -use std::path::Path; - pub fn check_unique<'a, I, T>(sorted: I) -> Result<()> where T: 'a + PartialEq + std::fmt::Display, @@ -29,96 +22,6 @@ where Ok(()) } -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub enum CandidSource<'a> { - File(&'a Path), - Text(&'a str), -} -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -impl<'a> CandidSource<'a> { - pub fn load(&self) -> Result<(TypeEnv, Option)> { - Ok(match self { - CandidSource::File(path) => pretty_check_file(path)?, - CandidSource::Text(str) => { - let ast = pretty_parse("", str)?; - let mut env = TypeEnv::new(); - let actor = check_prog(&mut env, &ast)?; - (env, actor) - } - }) - } -} - -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -/// Check compatibility of two service types -pub fn service_compatible(new: CandidSource, old: CandidSource) -> Result<()> { - let (mut env, t1) = new.load()?; - let t1 = t1.ok_or_else(|| Error::msg("new interface has no main service type"))?; - let (env2, t2) = old.load()?; - let t2 = t2.ok_or_else(|| Error::msg("old interface has no main service type"))?; - let mut gamma = std::collections::HashSet::new(); - let t2 = env.merge_type(env2, t2); - crate::types::subtype::subtype(&mut gamma, &env, &t1, &t2)?; - Ok(()) -} - -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -/// Check structural equality of two service types -pub fn service_equal(left: CandidSource, right: CandidSource) -> Result<()> { - let (mut env, t1) = left.load()?; - let t1 = t1.ok_or_else(|| Error::msg("left interface has no main service type"))?; - let (env2, t2) = right.load()?; - let t2 = t2.ok_or_else(|| Error::msg("right interface has no main service type"))?; - let mut gamma = std::collections::HashSet::new(); - let t2 = env.merge_type(env2, t2); - crate::types::subtype::equal(&mut gamma, &env, &t1, &t2)?; - Ok(()) -} - -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -/// Take a did file and outputs the init args and the service type (without init args). -/// If the original did file contains imports, the output flattens the type definitions. -/// For now, the comments from the original did file is omitted. -pub fn instantiate_candid(candid: CandidSource) -> Result<(Vec, (TypeEnv, Type))> { - use crate::types::TypeInner; - let (env, serv) = candid.load()?; - let serv = serv.ok_or_else(|| Error::msg("the Candid interface has no main service type"))?; - let serv = env.trace_type(&serv)?; - Ok(match serv.as_ref() { - TypeInner::Class(args, ty) => (args.clone(), (env, ty.clone())), - TypeInner::Service(_) => (vec![], (env, serv)), - _ => unreachable!(), - }) -} - -/// Merge canister metadata candid:args and candid:service into a service constructor. -/// If candid:service already contains init args, returns the original did file. -#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] -#[cfg(feature = "parser")] -pub fn merge_init_args(candid: &str, init: &str) -> Result<(TypeEnv, Type)> { - use crate::parser::{types::IDLInitArgs, typing::check_init_args}; - use crate::types::TypeInner; - let candid = CandidSource::Text(candid); - let (env, serv) = candid.load()?; - let serv = serv.ok_or_else(|| Error::msg("the Candid interface has no main service type"))?; - let serv = env.trace_type(&serv)?; - match serv.as_ref() { - TypeInner::Class(_, _) => Ok((env, serv)), - TypeInner::Service(_) => { - let prog = init.parse::()?; - let mut env2 = TypeEnv::new(); - let args = check_init_args(&mut env2, &env, &prog)?; - Ok((env2, TypeInner::Class(args, serv).into())) - } - _ => unreachable!(), - } -} - /// Encode sequence of Rust values into Candid message of type `candid::Result>`. #[macro_export] macro_rules! Encode { diff --git a/rust/candid_parser/Cargo.toml b/rust/candid_parser/Cargo.toml new file mode 100644 index 00000000..8139bafb --- /dev/null +++ b/rust/candid_parser/Cargo.toml @@ -0,0 +1,100 @@ +[package] +name = "candid_parser" +version = "0.1.0" +edition = "2021" +authors = ["DFINITY Team"] +description = "Candid is an interface description language (IDL) for interacting with canisters running on the Internet Computer." +homepage = "https://internetcomputer.org/docs/current/developer-docs/build/candid/candid-concepts" +documentation = "https://docs.rs/candid" +repository = "https://github.com/dfinity/candid" +license = "Apache-2.0" +readme = "README.md" + +categories = ["encoding", "parsing", "wasm"] +keywords = ["internet-computer", "idl", "candid", "dfinity", "parser"] +include = ["src", "Cargo.toml", "build.rs", "LICENSE", "README.md"] +build = "build.rs" + +[build-dependencies] +lalrpop = { version = "0.20.0", optional = true } + +[dependencies] +candid.path = "../candid" +byteorder = "1.4.3" +candid_derive = { path = "../candid_derive", version = "=0.6.3" } +codespan-reporting = "0.11" +crc32fast = "1.3.0" +data-encoding = "2.4.0" +hex = "0.4.2" +leb128 = "0.2.4" +num_enum = "0.6.1" +num-bigint = { version = "0.4.2", features = ["serde"] } +num-traits = "0.2.12" +paste = "1.0.0" +pretty = "0.12.0" +serde = { version = "1.0.118", features = ["derive"] } +serde_bytes = "0.11" +sha2 = "0.10.1" +thiserror = "1.0.20" +anyhow = "1.0" +binread = { version = "2.1", features = ["debug_template"] } + +lalrpop-util = { version = "0.20.0", optional = true } +logos = { version = "0.13", optional = true } +convert_case = { version = "0.6", optional = true } + +arbitrary = { version = "1.0", optional = true } +# Don't upgrade serde_dhall. It will introduce dependency with invalid license. +serde_dhall = { version = "0.11", default-features = false, optional = true } +fake = { version = "2.4", optional = true } +rand = { version = "0.8", optional = true } + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +stacker = "0.1" + +[dev-dependencies] +goldenfile = "1.1.0" +test-generator = "0.3.0" +rand = "0.8" +criterion = "0.4" +serde_cbor = "0.11.2" +serde_json = "1.0.74" +serde_test = "1.0.137" +impls = "1" +bincode = "1.3.3" + +[[bench]] +name = "benchmark" +harness = false +path = "benches/benchmark.rs" + +[[test]] +name = "test_suite" +path = "tests/test_suite.rs" +required-features = ["parser"] +[[test]] +name = "value" +path = "tests/value.rs" +required-features = ["parser"] +[[test]] +name = "parse_value" +path = "tests/parse_value.rs" +required-features = ["parser"] +[[test]] +name = "parse_type" +path = "tests/parse_type.rs" +required-features = ["parser"] + +[features] +configs = ["serde_dhall"] +random = ["parser", "configs", "arbitrary", "fake", "rand"] +parser = ["lalrpop", "lalrpop-util", "logos", "convert_case"] +all = ["random"] +mute_warnings = [] + +# docs.rs-specific configuration +# To test locally: RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --features all +[package.metadata.docs.rs] +features = ["all"] +# defines the configuration attribute `docsrs` +rustdoc-args = ["--cfg", "docsrs"] diff --git a/rust/candid_parser/LICENSE b/rust/candid_parser/LICENSE new file mode 100644 index 00000000..b27ba1fe --- /dev/null +++ b/rust/candid_parser/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 DFINITY LLC. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/rust/candid_parser/README.md b/rust/candid_parser/README.md new file mode 100644 index 00000000..f16e8ece --- /dev/null +++ b/rust/candid_parser/README.md @@ -0,0 +1,9 @@ +# Candid + +[Candid](https://github.com/dfinity/candid/tree/master/spec/Candid.md) is an interface description language (IDL) for interacting with _canisters_ (also known as _services_ or _actors_) running on the Internet Computer. + +The Candid crate is a serialization/deserialization library for Candid. You can seamlessly convert between Rust values and Candid in both binary and text format. + +# Usage + +See [the docs here](https://docs.rs/candid). diff --git a/rust/candid_parser/benches/benchmark.rs b/rust/candid_parser/benches/benchmark.rs new file mode 100644 index 00000000..14deb618 --- /dev/null +++ b/rust/candid_parser/benches/benchmark.rs @@ -0,0 +1,192 @@ +use candid::{CandidType, Decode, Deserialize, Encode, Int, Nat, Principal}; +use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; +use std::collections::BTreeMap; + +fn bench_blob(c: &mut Criterion) { + use serde_bytes::{ByteBuf, Bytes}; + let vec: Vec = vec![0x61; 524288]; + let mut group = c.benchmark_group("Blob"); + group.bench_function("ByteBuf", |b| { + b.iter_batched( + || vec.clone(), + |vec| { + let bytes = Encode!(&ByteBuf::from(vec)).unwrap(); + Decode!(&bytes, ByteBuf).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + group.bench_function("Bytes", |b| { + b.iter_batched_ref( + || vec.clone(), + |vec| { + let bytes = Encode!(&Bytes::new(vec)).unwrap(); + Decode!(&bytes, &Bytes).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + let text = String::from_utf8(vec).unwrap(); + group.bench_function("String", |b| { + b.iter_batched( + || text.clone(), + |text| { + let bytes = Encode!(&text).unwrap(); + Decode!(&bytes, String).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + group.bench_function("&str", |b| { + b.iter_batched_ref( + || text.clone(), + |text| { + let bytes = Encode!(text).unwrap(); + Decode!(&bytes, &str).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + group.finish(); +} + +fn bench_collections(c: &mut Criterion) { + let mut group = c.benchmark_group("Collections"); + { + let vec8: Vec = vec![0x61; 524288]; + group.bench_function("vec nat8", |b| { + b.iter_batched( + || vec8.clone(), + |vec| { + let bytes = Encode!(&vec).unwrap(); + Decode!(&bytes, Vec).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + } + { + let vec64: Vec = vec![-1; 524288]; + group.bench_function("vec int64", |b| { + b.iter_batched( + || vec64.clone(), + |vec| { + let bytes = Encode!(&vec).unwrap(); + Decode!(&bytes, Vec).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + } + { + let vec: Vec = vec![Int::from(-1); 65536]; + group.bench_function("vec int", |b| { + b.iter_batched( + || vec.clone(), + |vec| { + let bytes = Encode!(&vec).unwrap(); + Decode!(&bytes, Vec).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + } + { + let map: BTreeMap = + (0..65536).map(|i| (i.to_string(), Nat::from(i))).collect(); + group.bench_function("vec (text, nat)", |b| { + b.iter_batched( + || map.clone(), + |map| { + let bytes = Encode!(&map).unwrap(); + Decode!(&bytes, BTreeMap).unwrap(); + }, + BatchSize::SmallInput, + ) + }); + } +} + +fn bench_recursion(c: &mut Criterion) { + let n = 1024; + #[derive(CandidType, Deserialize, Clone)] + struct List { + head: Int, + tail: Option>, + } + #[derive(CandidType, Deserialize, Clone)] + enum VariantList { + Nil, + Cons(Int, Box), + } + { + let list: Option> = (0..n).fold(None, |acc, x| { + Some(Box::new(List { + head: Int::from(x), + tail: acc, + })) + }); + c.bench_with_input(BenchmarkId::new("option list", n), &list, |b, list| { + b.iter(|| { + let bytes = Encode!(list).unwrap(); + Decode!(&bytes, Option>).unwrap() + }) + }); + } + { + let list: VariantList = (0..n).fold(VariantList::Nil, |acc, x| { + VariantList::Cons(Int::from(x), Box::new(acc)) + }); + c.bench_with_input(BenchmarkId::new("variant list", n), &list, |b, list| { + b.iter(|| { + let bytes = Encode!(list).unwrap(); + Decode!(&bytes, VariantList).unwrap() + }) + }); + } +} + +fn bench_profile(c: &mut Criterion) { + #[derive(CandidType, Deserialize, Clone)] + #[allow(non_snake_case)] + struct Profile { + id: Principal, + imgUrl: String, + title: String, + education: String, + experience: String, + company: String, + lastName: String, + firstName: String, + } + let profile = Profile { + id: Principal::from_text("27u75-h7div-y6axr-knc2i-3bsij-dr5wo-jdb5t-ndd2n-mh22v-ooz2s-iqe").unwrap(), + firstName: "Dominic".to_string(), + lastName: "Williams".to_string(), + title: "Founder & Chief Scientist".to_string(), + company: "DFINITY".to_string(), + experience: "**President & Chief Scientist**, DFINITY \nJan 2015 – Present \nPalo Alto, CA\n\n**President & CTO**, String Labs, Inc \nJun 2015 – Feb 2018 \nPalo Alto, CA".to_string(), + education: "**King's College London** \nBA, Computer Science".to_string(), + imgUrl: "https://media-exp1.licdn.com/dms/image/C5603AQHdxGV6zMbg-A/profile-displayphoto-shrink_200_200/0?e=1592438400&v=beta&t=NlR0J9mgJXd3SO6K3YJ6xBC_wCip20u5THPNKu6ImYQ".to_string(), + }; + let profiles: Vec<_> = std::iter::repeat(profile).take(1024).collect(); + c.bench_with_input( + BenchmarkId::new("profiles", profiles.len()), + &profiles, + |b, vec| { + b.iter(|| { + let bytes = Encode!(vec).unwrap(); + Decode!(&bytes, Vec).unwrap() + }) + }, + ); +} + +criterion_group!( + benches, + bench_blob, + bench_collections, + bench_profile, + bench_recursion +); +criterion_main!(benches); diff --git a/rust/candid_parser/build.rs b/rust/candid_parser/build.rs new file mode 100644 index 00000000..e6396a04 --- /dev/null +++ b/rust/candid_parser/build.rs @@ -0,0 +1,8 @@ +fn main() { + #[cfg(feature = "parser")] + lalrpop::Configuration::new() + .use_cargo_dir_conventions() + .emit_rerun_directives(true) + .process_file("src/parser/grammar.lalrpop") + .unwrap(); +} diff --git a/rust/candid_parser/fuzz/.gitignore b/rust/candid_parser/fuzz/.gitignore new file mode 100644 index 00000000..a0925114 --- /dev/null +++ b/rust/candid_parser/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/rust/candid_parser/fuzz/Cargo.lock b/rust/candid_parser/fuzz/Cargo.lock new file mode 100644 index 00000000..a908c669 --- /dev/null +++ b/rust/candid_parser/fuzz/Cargo.lock @@ -0,0 +1,968 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" + +[[package]] +name = "arbitrary" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e90af4de65aa7b293ef2d09daff88501eb254f58edde2e1ac02c82d873eadad" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "binread" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16598dfc8e6578e9b597d9910ba2e73618385dc9f4b1d43dd92c349d6be6418f" +dependencies = [ + "binread_derive", + "lazy_static", + "rustversion", +] + +[[package]] +name = "binread_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d9672209df1714ee804b1f4d4f68c8eb2a90b1f7a07acf472f88ce198ef1fed" +dependencies = [ + "either", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "block-buffer" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "candid" +version = "0.9.0-beta.2" +dependencies = [ + "anyhow", + "binread", + "byteorder", + "candid_derive", + "codespan-reporting", + "crc32fast", + "data-encoding", + "hex", + "lalrpop", + "lalrpop-util", + "leb128", + "logos", + "num-bigint", + "num-traits", + "num_enum", + "paste", + "pretty", + "serde", + "serde_bytes", + "sha2", + "thiserror", +] + +[[package]] +name = "candid-fuzz" +version = "0.0.0" +dependencies = [ + "candid", + "libfuzzer-sys", + "serde", + "serde_bytes", +] + +[[package]] +name = "candid_derive" +version = "0.6.0" +dependencies = [ + "lazy_static", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +dependencies = [ + "jobserver", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "cpufeatures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "data-encoding" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb" + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "digest" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "ena" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7402b94a93c24e742487327a7cd839dc9d36fec9de9fb25b09f2dae459f36c3" +dependencies = [ + "log", +] + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "generic-array" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "indexmap" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "jobserver" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b" +dependencies = [ + "libc", +] + +[[package]] +name = "lalrpop" +version = "0.19.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b30455341b0e18f276fa64540aff54deafb54c589de6aca68659c63dd2d5d823" +dependencies = [ + "ascii-canvas", + "atty", + "bit-set", + "diff", + "ena", + "itertools", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", +] + +[[package]] +name = "lalrpop-util" +version = "0.19.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf796c978e9b4d983414f4caedc9273aa33ee214c5b887bd55fde84c85d2dc4" +dependencies = [ + "regex", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "leb128" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "beb09950ae85a0a94b27676cccf37da5ff13f27076aa1adbc6545dd0d0e1bd4e" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "logos" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf8b031682c67a8e3d5446840f9573eb7fe26efe7ec8d195c9ac4c0647c502f1" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d849148dbaf9661a6151d1ca82b13bb4c4c128146a88d05253b38d4e2f496c" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "nom8" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" +dependencies = [ + "memchr", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_enum" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d829733185c1ca374f17e52b762f24f535ec625d2cc1f070e34c8a9068f341b" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2be1598bf1c313dcdd12092e3f1920f463462525a21b7b4e11b4168353d0123e" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "once_cell" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ef8814b5c993410bb3adfad7a5ed269563e4a2f90c41f5d85be7fb47133bf" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "paste" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" + +[[package]] +name = "petgraph" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pico-args" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "pretty" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad9940b913ee56ddd94aec2d3cd179dd47068236f42a1a6415ccf9d880ce2a61" +dependencies = [ + "arrayvec", + "typed-arena", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66618389e4ec1c7afe67d51a9bf34ff9236480f8d51e7489b7d5ab0303c13f34" +dependencies = [ + "once_cell", + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" + +[[package]] +name = "rustversion" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_bytes" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "718dc5fff5b36f99093fc49b280cfc96ce6fc824317783bff5a1fed0c7a64819" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sha2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" + +[[package]] +name = "string_cache" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "toml_datetime" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" + +[[package]] +name = "toml_edit" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c59d8dd7d0dcbc6428bf7aa2f0e823e26e43b3c9aca15bbc9475d23e5fa12b" +dependencies = [ + "indexmap", + "nom8", + "toml_datetime", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" diff --git a/rust/candid_parser/fuzz/Cargo.toml b/rust/candid_parser/fuzz/Cargo.toml new file mode 100644 index 00000000..2c011dab --- /dev/null +++ b/rust/candid_parser/fuzz/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "candid-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +serde_bytes = "0.11" +serde = "1.0.115" + +[dependencies.candid] +path = ".." +features = ["parser"] + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "parser" +path = "fuzz_targets/parser.rs" +test = false +doc = false + +[[bin]] +name = "type_decoder" +path = "fuzz_targets/type_decoder.rs" +test = false +doc = false \ No newline at end of file diff --git a/rust/candid_parser/fuzz/fuzz_targets/parser.rs b/rust/candid_parser/fuzz/fuzz_targets/parser.rs new file mode 100644 index 00000000..c652714f --- /dev/null +++ b/rust/candid_parser/fuzz/fuzz_targets/parser.rs @@ -0,0 +1,13 @@ +#![no_main] +use candid::types::value::IDLArgs; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + let decoded = match IDLArgs::from_bytes(&data) { + Ok(_v) => _v, + Err(_e) => return, + }; + let _ = decoded.get_types(); + let _ = decoded.to_bytes(); + let _ = decoded.to_string(); +}); diff --git a/rust/candid_parser/fuzz/fuzz_targets/type_decoder.rs b/rust/candid_parser/fuzz/fuzz_targets/type_decoder.rs new file mode 100644 index 00000000..b6e4a609 --- /dev/null +++ b/rust/candid_parser/fuzz/fuzz_targets/type_decoder.rs @@ -0,0 +1,44 @@ +#![no_main] +use candid::{define_function, CandidType, Decode, Deserialize, Nat}; +use libfuzzer_sys::fuzz_target; +use serde_bytes::ByteBuf; + +#[derive(CandidType, Deserialize)] +pub struct Token { + pub key: String, + pub content_encoding: String, + pub index: Nat, + pub sha256: Option, +} + +define_function!(pub callback : (&u8) -> (Nat)); +#[derive(CandidType, Deserialize)] +pub struct CallbackStrategy { + pub callback: callback, + pub token: Token, +} + +#[derive(CandidType, Clone, Deserialize)] +pub struct HeaderField(pub String, pub String); + +#[derive(CandidType, Deserialize)] +pub enum StreamingStrategy { + Callback(CallbackStrategy), +} + +#[derive(CandidType, Deserialize)] +pub struct HttpResponse { + pub status_code: u16, + pub headers: Vec, + #[serde(with = "serde_bytes")] + pub body: Vec, + pub streaming_strategy: Option, +} + +fuzz_target!(|data: &[u8]| { + let payload = data.to_vec(); + let _decoded = match Decode!(payload.as_slice(), HttpResponse) { + Ok(_v) => _v, + Err(_e) => return, + }; +}); diff --git a/rust/candid_parser/src/binary_parser.rs b/rust/candid_parser/src/binary_parser.rs new file mode 100644 index 00000000..74875313 --- /dev/null +++ b/rust/candid_parser/src/binary_parser.rs @@ -0,0 +1,274 @@ +use crate::types::internal::{Field, Function, Label, Type, TypeInner}; +use crate::types::{FuncMode, TypeEnv}; +use anyhow::{anyhow, Context, Result}; +use binread::io::{Read, Seek}; +use binread::{BinRead, BinResult, Error as BError, ReadOptions}; +use std::convert::TryInto; + +fn read_leb(reader: &mut R, ro: &ReadOptions, _: ()) -> BinResult { + let pos = reader.stream_position()?; + leb128::read::unsigned(reader).map_err(|_| BError::Custom { + pos, + err: Box::new(ro.variable_name.unwrap_or("Invalid leb128")), + }) +} +fn read_sleb(reader: &mut R, ro: &ReadOptions, _: ()) -> BinResult { + let pos = reader.stream_position()?; + leb128::read::signed(reader).map_err(|_| BError::Custom { + pos, + err: Box::new(ro.variable_name.unwrap_or("Invalid sleb128")), + }) +} + +#[derive(BinRead, Debug)] +#[br(magic = b"DIDL")] +pub struct Header { + table: Table, + #[br(parse_with = read_leb)] + len: u64, + #[br(count = len)] + args: Vec, +} +#[derive(BinRead, Debug)] +struct Table { + #[br(parse_with = read_leb, assert(len <= i64::MAX as u64, "type table size out of range"))] + len: u64, + #[br(count = len)] + table: Vec, +} +#[derive(BinRead, Debug)] +enum ConsType { + #[br(magic = 0x6eu8)] + Opt(Box), + #[br(magic = 0x6du8)] + Vec(Box), + #[br(magic = 0x6cu8)] + Record(Fields), + #[br(magic = 0x6bu8)] + Variant(Fields), + #[br(magic = 0x6au8)] + Func(FuncType), + #[br(magic = 0x69u8)] + Service(ServType), + Future(FutureType), +} +#[derive(BinRead, Debug)] +struct IndexType { + #[br(parse_with = read_sleb, assert(index >= -17 || index == -24, "unknown opcode {}", index))] + index: i64, +} +#[derive(BinRead, Debug)] +struct Fields { + #[br(parse_with = read_leb, try_map = |x:u64| x.try_into().map_err(|_| "field length out of 32-bit range"))] + len: u32, + #[br(count = len)] + inner: Vec, +} +#[derive(BinRead, Debug)] +struct FieldType { + #[br(parse_with = read_leb, try_map = |x:u64| x.try_into().map_err(|_| "field id out of 32-bit range"))] + id: u32, + index: IndexType, +} +#[derive(BinRead, Debug)] +struct FuncType { + #[br(parse_with = read_leb)] + arg_len: u64, + #[br(count = arg_len)] + args: Vec, + #[br(parse_with = read_leb)] + ret_len: u64, + #[br(count = ret_len)] + rets: Vec, + #[br(assert(ann_len <= 1u8, "function annotation length should be at most 1"))] + ann_len: u8, + #[br(count = ann_len)] + ann: Vec, +} +#[derive(BinRead, Debug)] +struct ServType { + #[br(parse_with = read_leb)] + len: u64, + #[br(count = len)] + meths: Vec, +} +#[derive(BinRead, Debug)] +struct FutureType { + #[br(parse_with = read_sleb, assert(opcode < -24, "{} is not a valid future type", opcode))] + opcode: i64, + #[br(parse_with = read_leb)] + len: u64, + #[br(count = len)] + blob: Vec, +} +#[derive(BinRead, Debug)] +struct Meths { + #[br(parse_with = read_leb)] + len: u64, + #[br(count = len, try_map = |x:Vec| String::from_utf8(x).map_err(|_| "invalid utf8"))] + name: String, + ty: IndexType, +} +#[derive(BinRead, Debug)] +struct Mode { + #[br(try_map = |x:u8| match x { 1u8 => Ok(FuncMode::Query), | 2u8 => Ok(FuncMode::Oneway), | 3u8 => Ok(FuncMode::CompositeQuery), | _ => Err("Unknown annotation") })] + inner: FuncMode, +} + +#[derive(BinRead)] +pub struct BoolValue( + #[br(try_map = |x:u8| match x { 0u8 => Ok(false), | 1u8 => Ok(true), | _ => Err("Expect 00 or 01") } )] + pub bool, +); +#[derive(BinRead)] +pub struct Len( + #[br(parse_with = read_leb, try_map = |x:u64| x.try_into().map_err(|_| "length out of usize range"))] + pub usize, +); +#[derive(BinRead)] +pub struct PrincipalBytes { + #[br(assert(flag == 1u8, "Opaque reference not supported"))] + pub flag: u8, + #[br(parse_with = read_leb)] + pub len: u64, + #[br(count = len)] + pub inner: Vec, +} + +fn index_to_var(ind: i64) -> String { + format!("table{ind}") +} +impl IndexType { + fn to_type(&self, len: u64) -> Result { + Ok(match self.index { + v if v >= 0 => { + if v >= len as i64 { + return Err(anyhow!("type index {} out of range", v)); + } + TypeInner::Var(index_to_var(v)) + } + -1 => TypeInner::Null, + -2 => TypeInner::Bool, + -3 => TypeInner::Nat, + -4 => TypeInner::Int, + -5 => TypeInner::Nat8, + -6 => TypeInner::Nat16, + -7 => TypeInner::Nat32, + -8 => TypeInner::Nat64, + -9 => TypeInner::Int8, + -10 => TypeInner::Int16, + -11 => TypeInner::Int32, + -12 => TypeInner::Int64, + -13 => TypeInner::Float32, + -14 => TypeInner::Float64, + -15 => TypeInner::Text, + -16 => TypeInner::Reserved, + -17 => TypeInner::Empty, + -24 => TypeInner::Principal, + _ => unreachable!(), + } + .into()) + } +} +impl ConsType { + fn to_type(&self, len: u64) -> Result { + Ok(match &self { + ConsType::Opt(ref ind) => TypeInner::Opt(ind.to_type(len)?), + ConsType::Vec(ref ind) => TypeInner::Vec(ind.to_type(len)?), + ConsType::Record(fs) | ConsType::Variant(fs) => { + let mut res = Vec::new(); + let mut prev = None; + for f in fs.inner.iter() { + if let Some(prev) = prev { + if prev >= f.id { + return Err(anyhow!("field id {} collision or not sorted", f.id)); + } + } + prev = Some(f.id); + let field = Field { + id: Label::Id(f.id).into(), + ty: f.index.to_type(len)?, + }; + res.push(field); + } + if matches!(&self, ConsType::Record(_)) { + TypeInner::Record(res) + } else { + TypeInner::Variant(res) + } + } + ConsType::Func(f) => { + let mut args = Vec::new(); + let mut rets = Vec::new(); + for arg in f.args.iter() { + args.push(arg.to_type(len)?); + } + for ret in f.rets.iter() { + rets.push(ret.to_type(len)?); + } + TypeInner::Func(Function { + modes: f.ann.iter().map(|x| x.inner.clone()).collect(), + args, + rets, + }) + } + ConsType::Service(serv) => { + let mut res = Vec::new(); + let mut prev = None; + for m in serv.meths.iter() { + if let Some(prev) = prev { + if prev >= &m.name { + return Err(anyhow!("method name {} duplicate or not sorted", m.name)); + } + } + prev = Some(&m.name); + res.push((m.name.clone(), m.ty.to_type(len)?)); + } + TypeInner::Service(res) + } + ConsType::Future(_) => TypeInner::Future, + } + .into()) + } +} +impl Table { + fn to_env(&self, len: u64) -> Result { + use std::collections::BTreeMap; + let mut env = BTreeMap::new(); + for (i, t) in self.table.iter().enumerate() { + let ty = t + .to_type(len) + .with_context(|| format!("Invalid table entry {i}: {t:?}"))?; + env.insert(index_to_var(i as i64), ty); + } + // validate method has func type + for (_, t) in env.iter() { + if let TypeInner::Service(ms) = t.as_ref() { + for (name, ty) in ms.iter() { + if let TypeInner::Var(id) = ty.as_ref() { + if matches!(env.get(id).map(|t| t.as_ref()), Some(TypeInner::Func(_))) { + continue; + } + } + return Err(anyhow!("Method {name} has a non-function type {ty}")); + } + } + } + Ok(TypeEnv(env)) + } +} +impl Header { + pub fn to_types(&self) -> Result<(TypeEnv, Vec)> { + let len = self.table.len; + let mut env = self.table.to_env(len)?; + env.replace_empty()?; + let mut args = Vec::new(); + for (i, t) in self.args.iter().enumerate() { + args.push( + t.to_type(len) + .with_context(|| format!("Invalid argument entry {i}: {t:?}"))?, + ); + } + Ok((env, args)) + } +} diff --git a/rust/candid/src/bindings/analysis.rs b/rust/candid_parser/src/bindings/analysis.rs similarity index 100% rename from rust/candid/src/bindings/analysis.rs rename to rust/candid_parser/src/bindings/analysis.rs diff --git a/rust/candid_parser/src/bindings/candid.rs b/rust/candid_parser/src/bindings/candid.rs new file mode 100644 index 00000000..ffe4d8d3 --- /dev/null +++ b/rust/candid_parser/src/bindings/candid.rs @@ -0,0 +1,474 @@ +use crate::pretty::*; +use crate::types::{Field, Function, Label, SharedLabel, Type, TypeEnv, TypeInner}; +use pretty::RcDoc; + +static KEYWORDS: [&str; 30] = [ + "import", + "service", + "func", + "type", + "opt", + "vec", + "record", + "variant", + "blob", + "principal", + "nat", + "nat8", + "nat16", + "nat32", + "nat64", + "int", + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "bool", + "text", + "null", + "reserved", + "empty", + "oneway", + "query", + "composite_query", +]; + +fn is_keyword(id: &str) -> bool { + KEYWORDS.contains(&id) +} + +pub(crate) fn is_valid_as_id(id: &str) -> bool { + if id.is_empty() || !id.is_ascii() { + return false; + } + for (i, c) in id.char_indices() { + if i == 0 { + if !c.is_ascii_alphabetic() && c != '_' { + return false; + } + } else if !c.is_ascii_alphanumeric() && c != '_' { + return false; + } + } + true +} + +fn needs_quote(id: &str) -> bool { + !is_valid_as_id(id) || is_keyword(id) +} + +pub(crate) fn ident_string(id: &str) -> String { + if needs_quote(id) { + format!("\"{}\"", id.escape_debug()) + } else { + id.to_string() + } +} + +pub(crate) fn pp_text(id: &str) -> RcDoc { + RcDoc::text(ident_string(id)) +} + +pub fn pp_ty(ty: &Type) -> RcDoc { + pp_ty_inner(ty.as_ref()) +} + +pub fn pp_ty_inner(ty: &TypeInner) -> RcDoc { + use TypeInner::*; + match ty { + Null => str("null"), + Bool => str("bool"), + Nat => str("nat"), + Int => str("int"), + Nat8 => str("nat8"), + Nat16 => str("nat16"), + Nat32 => str("nat32"), + Nat64 => str("nat64"), + Int8 => str("int8"), + Int16 => str("int16"), + Int32 => str("int32"), + Int64 => str("int64"), + Float32 => str("float32"), + Float64 => str("float64"), + Text => str("text"), + Reserved => str("reserved"), + Empty => str("empty"), + Var(ref s) => str(s), + Principal => str("principal"), + Opt(ref t) => kwd("opt").append(pp_ty(t)), + Vec(ref t) => kwd("vec").append(pp_ty(t)), + Record(ref fs) => { + let t = Type(ty.clone().into()); + if t.is_tuple() { + let tuple = concat(fs.iter().map(|f| pp_ty(&f.ty)), ";"); + kwd("record").append(enclose_space("{", tuple, "}")) + } else { + kwd("record").append(pp_fields(fs, false)) + } + } + Variant(ref fs) => kwd("variant").append(pp_fields(fs, true)), + Func(ref func) => kwd("func").append(pp_function(func)), + Service(ref serv) => kwd("service").append(pp_service(serv)), + Class(ref args, ref t) => { + let doc = pp_args(args).append(" ->").append(RcDoc::space()); + match t.as_ref() { + Service(ref serv) => doc.append(pp_service(serv)), + Var(ref s) => doc.append(s), + _ => unreachable!(), + } + } + Knot(ref id) => RcDoc::text(format!("{id}")), + Unknown => str("unknown"), + Future => str("future"), + } +} + +pub fn pp_label(id: &SharedLabel) -> RcDoc { + match &**id { + Label::Named(id) => pp_text(id), + Label::Id(_) | Label::Unnamed(_) => RcDoc::as_string(id), + } +} + +pub(crate) fn pp_field(field: &Field, is_variant: bool) -> RcDoc { + let ty_doc = if is_variant && *field.ty == TypeInner::Null { + RcDoc::nil() + } else { + kwd(" :").append(pp_ty(&field.ty)) + }; + pp_label(&field.id).append(ty_doc) +} + +fn pp_fields(fs: &[Field], is_variant: bool) -> RcDoc { + let fields = concat(fs.iter().map(|f| pp_field(f, is_variant)), ";"); + enclose_space("{", fields, "}") +} + +pub fn pp_function(func: &Function) -> RcDoc { + let args = pp_args(&func.args); + let rets = pp_args(&func.rets); + let modes = pp_modes(&func.modes); + args.append(" ->") + .append(RcDoc::space()) + .append(rets.append(modes)) + .nest(INDENT_SPACE) +} + +pub fn pp_args(args: &[Type]) -> RcDoc { + let doc = concat(args.iter().map(pp_ty), ","); + enclose("(", doc, ")") +} + +pub(crate) fn pp_modes(modes: &[crate::types::FuncMode]) -> RcDoc { + RcDoc::concat(modes.iter().map(|m| RcDoc::space().append(m.to_doc()))) +} + +fn pp_service(serv: &[(String, Type)]) -> RcDoc { + let doc = concat( + serv.iter().map(|(id, func)| { + let func_doc = match func.as_ref() { + TypeInner::Func(ref f) => pp_function(f), + TypeInner::Var(_) => pp_ty(func), + _ => unreachable!(), + }; + pp_text(id).append(kwd(" :")).append(func_doc) + }), + ";", + ); + enclose_space("{", doc, "}") +} + +fn pp_defs(env: &TypeEnv) -> RcDoc { + lines(env.0.iter().map(|(id, ty)| { + kwd("type") + .append(ident(id)) + .append(kwd("=")) + .append(pp_ty(ty)) + .append(";") + })) +} + +fn pp_actor(ty: &Type) -> RcDoc { + match ty.as_ref() { + TypeInner::Service(ref serv) => pp_service(serv), + TypeInner::Var(_) | TypeInner::Class(_, _) => pp_ty(ty), + _ => unreachable!(), + } +} + +pub fn compile(env: &TypeEnv, actor: &Option) -> String { + match actor { + None => pp_defs(env).pretty(LINE_WIDTH).to_string(), + Some(actor) => { + let defs = pp_defs(env); + let actor = kwd("service :").append(pp_actor(actor)); + let doc = defs.append(actor); + doc.pretty(LINE_WIDTH).to_string() + } + } +} + +pub mod value { + use crate::bindings::candid::{ident_string, pp_text}; + use crate::pretty::*; + use crate::types::value::{IDLArgs, IDLField, IDLValue}; + use crate::types::{number::pp_num_str, Label}; + use std::fmt; + + use ::pretty::RcDoc; + + const MAX_ELEMENTS_FOR_PRETTY_PRINT: usize = 10; + + impl fmt::Display for IDLArgs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", pp_args(self).pretty(80)) + } + } + + impl fmt::Display for IDLValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + pp_value(MAX_ELEMENTS_FOR_PRETTY_PRINT, self).pretty(80) + ) + } + } + + impl fmt::Debug for IDLArgs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.args.len() == 1 { + write!(f, "({:?})", self.args[0]) + } else { + let mut tup = f.debug_tuple(""); + for arg in self.args.iter() { + tup.field(arg); + } + tup.finish() + } + } + } + fn has_type_annotation(v: &IDLValue) -> bool { + use IDLValue::*; + matches!( + v, + Int(_) + | Nat(_) + | Nat8(_) + | Nat16(_) + | Nat32(_) + | Nat64(_) + | Int8(_) + | Int16(_) + | Int32(_) + | Int64(_) + | Float32(_) + | Float64(_) + | Null + | Reserved + ) + } + pub fn number_to_string(v: &IDLValue) -> String { + use IDLValue::*; + match v { + Number(n) => n.to_string(), + Int(n) => n.to_string(), + Nat(n) => n.to_string(), + Nat8(n) => n.to_string(), + Nat16(n) => pp_num_str(&n.to_string()), + Nat32(n) => pp_num_str(&n.to_string()), + Nat64(n) => pp_num_str(&n.to_string()), + Int8(n) => n.to_string(), + Int16(n) => pp_num_str(&n.to_string()), + Int32(n) => pp_num_str(&n.to_string()), + Int64(n) => pp_num_str(&n.to_string()), + Float32(f) => { + if f.trunc() == *f { + format!("{f}.0") + } else { + f.to_string() + } + } + Float64(f) => { + if f.trunc() == *f { + format!("{f}.0") + } else { + f.to_string() + } + } + _ => unreachable!(), + } + } + impl fmt::Debug for IDLValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use IDLValue::*; + match self { + Null => write!(f, "null : null"), + Bool(b) => write!(f, "{b}"), + Number(n) => write!(f, "{n}"), + Int(i) => write!(f, "{i} : int"), + Nat(n) => write!(f, "{n} : nat"), + Nat8(n) => write!(f, "{n} : nat8"), + Nat16(n) => write!(f, "{} : nat16", pp_num_str(&n.to_string())), + Nat32(n) => write!(f, "{} : nat32", pp_num_str(&n.to_string())), + Nat64(n) => write!(f, "{} : nat64", pp_num_str(&n.to_string())), + Int8(n) => write!(f, "{n} : int8"), + Int16(n) => write!(f, "{} : int16", pp_num_str(&n.to_string())), + Int32(n) => write!(f, "{} : int32", pp_num_str(&n.to_string())), + Int64(n) => write!(f, "{} : int64", pp_num_str(&n.to_string())), + Float32(_) => write!(f, "{} : float32", number_to_string(self)), + Float64(_) => write!(f, "{} : float64", number_to_string(self)), + Text(s) => write!(f, "{s:?}"), + None => write!(f, "null"), + Reserved => write!(f, "null : reserved"), + Principal(id) => write!(f, "principal \"{id}\""), + Service(id) => write!(f, "service \"{id}\""), + Func(id, meth) => write!( + f, + "func \"{}\".{}", + id, + crate::bindings::candid::ident_string(meth) + ), + Opt(v) if has_type_annotation(v) => write!(f, "opt ({v:?})"), + Opt(v) => write!(f, "opt {v:?}"), + Vec(vs) => { + if let Some(Nat8(_)) = vs.first() { + write!(f, "blob \"")?; + for v in vs.iter() { + match v { + Nat8(v) => write!(f, "{}", &pp_char(*v))?, + _ => unreachable!(), + } + } + write!(f, "\"") + } else { + write!(f, "vec {{")?; + for v in vs.iter() { + write!(f, " {v:?};")? + } + write!(f, "}}") + } + } + Record(fs) => { + write!(f, "record {{")?; + for (i, e) in fs.iter().enumerate() { + if e.id.get_id() == i as u32 { + write!(f, " {:?};", e.val)?; + } else { + write!(f, " {e:?};")?; + } + } + write!(f, "}}") + } + Variant(v) => { + write!(f, "variant {{ ")?; + if v.0.val == Null { + write!(f, "{}", v.0.id)?; + } else { + write!(f, "{:?}", v.0)?; + } + write!(f, " }}") + } + } + } + } + impl fmt::Debug for IDLField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let lab = match &self.id { + Label::Named(id) => ident_string(id), + id => id.to_string(), + }; + write!(f, "{} = {:?}", lab, self.val) + } + } + + // The definition of tuple is language specific. + fn is_tuple(t: &IDLValue) -> bool { + match t { + IDLValue::Record(ref fs) => { + for (i, field) in fs.iter().enumerate() { + if field.id.get_id() != (i as u32) { + return false; + } + } + true + } + _ => false, + } + } + + fn pp_label(id: &Label) -> RcDoc { + match id { + Label::Named(id) => pp_text(id), + Label::Id(_) | Label::Unnamed(_) => RcDoc::as_string(id), + } + } + + fn pp_field(depth: usize, field: &IDLField, is_variant: bool) -> RcDoc { + let val_doc = if is_variant && field.val == IDLValue::Null { + RcDoc::nil() + } else { + kwd(" =").append(pp_value(depth - 1, &field.val)) + }; + pp_label(&field.id).append(val_doc) + } + + fn pp_fields(depth: usize, fields: &[IDLField]) -> RcDoc { + let fs = concat(fields.iter().map(|f| pp_field(depth, f, false)), ";"); + enclose_space("{", fs, "}") + } + + pub fn pp_char(v: u8) -> String { + if (0x20..=0x7e).contains(&v) && v != 0x22 && v != 0x27 && v != 0x60 && v != 0x5c { + std::char::from_u32(v as u32).unwrap().to_string() + } else { + format!("\\{v:02x}") + } + } + + pub fn pp_value(depth: usize, v: &IDLValue) -> RcDoc { + use IDLValue::*; + if depth == 0 { + return RcDoc::as_string(format!("{v:?}")); + } + match v { + Text(ref s) => RcDoc::as_string(format!("\"{}\"", s.escape_debug())), + Opt(v) if has_type_annotation(v) => { + kwd("opt").append(enclose("(", pp_value(depth - 1, v), ")")) + } + Opt(v) => kwd("opt").append(pp_value(depth - 1, v)), + Vec(vs) => { + if matches!(vs.first(), Some(Nat8(_))) || vs.len() > MAX_ELEMENTS_FOR_PRETTY_PRINT { + RcDoc::as_string(format!("{v:?}")) + } else { + let body = concat(vs.iter().map(|v| pp_value(depth - 1, v)), ";"); + kwd("vec").append(enclose_space("{", body, "}")) + } + } + Record(fields) => { + if is_tuple(v) { + let tuple = concat(fields.iter().map(|f| pp_value(depth - 1, &f.val)), ";"); + kwd("record").append(enclose_space("{", tuple, "}")) + } else { + kwd("record").append(pp_fields(depth, fields)) + } + } + Variant(v) => { + kwd("variant").append(enclose_space("{", pp_field(depth, &v.0, true), "}")) + } + _ => RcDoc::as_string(format!("{v:?}")), + } + } + + pub fn pp_args(args: &IDLArgs) -> RcDoc { + let body = concat( + args.args + .iter() + .map(|v| pp_value(MAX_ELEMENTS_FOR_PRETTY_PRINT, v)), + ",", + ); + enclose("(", body, ")") + } +} diff --git a/rust/candid/src/bindings/javascript.rs b/rust/candid_parser/src/bindings/javascript.rs similarity index 100% rename from rust/candid/src/bindings/javascript.rs rename to rust/candid_parser/src/bindings/javascript.rs diff --git a/rust/candid_parser/src/bindings/mod.rs b/rust/candid_parser/src/bindings/mod.rs new file mode 100644 index 00000000..89d86ab3 --- /dev/null +++ b/rust/candid_parser/src/bindings/mod.rs @@ -0,0 +1,20 @@ +//! Candid bindings for different languages. +// This module assumes the input are type checked, it is safe to use unwrap. + +pub mod candid; + +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub mod analysis; +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub mod javascript; +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub mod motoko; +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub mod rust; +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub mod typescript; diff --git a/rust/candid/src/bindings/motoko.rs b/rust/candid_parser/src/bindings/motoko.rs similarity index 100% rename from rust/candid/src/bindings/motoko.rs rename to rust/candid_parser/src/bindings/motoko.rs diff --git a/rust/candid/src/bindings/rust.rs b/rust/candid_parser/src/bindings/rust.rs similarity index 100% rename from rust/candid/src/bindings/rust.rs rename to rust/candid_parser/src/bindings/rust.rs diff --git a/rust/candid/src/bindings/typescript.rs b/rust/candid_parser/src/bindings/typescript.rs similarity index 100% rename from rust/candid/src/bindings/typescript.rs rename to rust/candid_parser/src/bindings/typescript.rs diff --git a/rust/candid_parser/src/de.rs b/rust/candid_parser/src/de.rs new file mode 100644 index 00000000..4d40df92 --- /dev/null +++ b/rust/candid_parser/src/de.rs @@ -0,0 +1,1103 @@ +//! Deserialize Candid binary format to Rust data structures + +use super::{ + error::{Error, Result}, + types::internal::{type_of, TypeId}, + types::{Field, Label, SharedLabel, Type, TypeEnv, TypeInner}, + CandidType, Int, Nat, +}; +use crate::{ + binary_parser::{BoolValue, Header, Len, PrincipalBytes}, + types::subtype::{subtype, Gamma}, +}; +use anyhow::{anyhow, Context}; +use binread::BinRead; +use byteorder::{LittleEndian, ReadBytesExt}; +use serde::de::{self, Visitor}; +use std::fmt::Write; +use std::{collections::VecDeque, io::Cursor, mem::replace}; + +/// Use this struct to deserialize a sequence of Rust values (heterogeneous) from IDL binary message. +pub struct IDLDeserialize<'de> { + de: Deserializer<'de>, +} +impl<'de> IDLDeserialize<'de> { + /// Create a new deserializer with IDL binary message. + pub fn new(bytes: &'de [u8]) -> Result { + let de = Deserializer::from_bytes(bytes) + .with_context(|| format!("Cannot parse header {}", &hex::encode(bytes)))?; + Ok(IDLDeserialize { de }) + } + /// Create a new deserializer with IDL binary message. The config is used to adjust some parameters in the deserializer. + pub fn new_with_config(bytes: &'de [u8], config: Config) -> Result { + let mut de = if config.minize_error_message { + Deserializer::from_bytes(bytes)? + } else { + Deserializer::from_bytes(bytes) + .with_context(|| format!("Cannot parse header {}", &hex::encode(bytes)))? + }; + de.zero_sized_values = config.zero_sized_values; + de.minize_error_message = config.minize_error_message; + Ok(IDLDeserialize { de }) + } + /// Deserialize one value from deserializer. + pub fn get_value(&mut self) -> Result + where + T: de::Deserialize<'de> + CandidType, + { + self.de.is_untyped = false; + self.deserialize_with_type(T::ty()) + } + pub fn get_value_with_type( + &mut self, + env: &TypeEnv, + expected_type: &Type, + ) -> Result { + self.de.table.merge(env)?; + self.de.is_untyped = true; + self.deserialize_with_type(expected_type.clone()) + } + fn deserialize_with_type(&mut self, expected_type: Type) -> Result + where + T: de::Deserialize<'de> + CandidType, + { + let expected_type = self.de.table.trace_type(&expected_type)?; + if self.de.types.is_empty() { + if matches!( + expected_type.as_ref(), + TypeInner::Opt(_) | TypeInner::Reserved | TypeInner::Null + ) { + self.de.expect_type = expected_type; + self.de.wire_type = TypeInner::Reserved.into(); + return T::deserialize(&mut self.de); + } else if self.de.minize_error_message { + return Err(Error::msg("No more values on the wire")); + } else { + return Err(Error::msg(format!( + "No more values on the wire, the expected type {expected_type} is not opt, null, or reserved" + ))); + } + } + + let (ind, ty) = self.de.types.pop_front().unwrap(); + self.de.expect_type = if matches!(expected_type.as_ref(), TypeInner::Unknown) { + self.de.is_untyped = true; + ty.clone() + } else { + expected_type.clone() + }; + self.de.wire_type = ty.clone(); + + let v = if self.de.minize_error_message { + T::deserialize(&mut self.de)? + } else { + T::deserialize(&mut self.de) + .with_context(|| self.de.dump_state()) + .with_context(|| { + format!("Fail to decode argument {ind} from {ty} to {expected_type}") + })? + }; + Ok(v) + } + /// Check if we finish deserializing all values. + pub fn is_done(&self) -> bool { + self.de.types.is_empty() + } + /// Return error if there are unprocessed bytes in the input. + pub fn done(mut self) -> Result<()> { + while !self.is_done() { + self.get_value::()?; + } + let ind = self.de.input.position() as usize; + let rest = &self.de.input.get_ref()[ind..]; + if !rest.is_empty() { + if self.de.minize_error_message { + return Err(Error::msg("Trailing value after finishing deserialization")); + } else { + return Err(anyhow!(self.de.dump_state())) + .context("Trailing value after finishing deserialization")?; + } + } + Ok(()) + } +} + +pub struct Config { + pub zero_sized_values: usize, + pub minize_error_message: bool, +} + +macro_rules! assert { + ( false ) => {{ + return Err(Error::msg(format!( + "Internal error at {}:{}. Please file a bug.", + file!(), + line!() + ))); + }}; + ( $pred:expr ) => {{ + if !$pred { + return Err(Error::msg(format!( + "Internal error at {}:{}. Please file a bug.", + file!(), + line!() + ))); + } + }}; +} + +macro_rules! check { + ( false ) => {{ + return Err(Error::Subtype(format!( + "Type mismatch at {}:{}", + file!(), + line!() + ))); + }}; + ($exp:expr, $msg:expr) => {{ + if !$exp { + return Err(Error::Subtype($msg.to_string())); + } + }}; +} +#[cfg(not(target_arch = "wasm32"))] +macro_rules! check_recursion { + ($this:ident $($body:tt)*) => { + $this.recursion_depth += 1; + match stacker::remaining_stack() { + Some(size) if size < 32768 => return Err(Error::msg(format!("Recursion limit exceeded at depth {}", $this.recursion_depth))), + None if $this.recursion_depth > 512 => return Err(Error::msg(format!("Recursion limit exceeded at depth {}. Cannot detect stack size, use a conservative bound", $this.recursion_depth))), + _ => (), + } + let __ret = { $this $($body)* }; + $this.recursion_depth -= 1; + __ret + }; +} +// No need to check recursion depth for wasm32, because canisters are running in a sandbox +#[cfg(target_arch = "wasm32")] +macro_rules! check_recursion { + ($this:ident $($body:tt)*) => { + $this $($body)* + }; +} + +#[derive(Clone)] +struct Deserializer<'de> { + input: Cursor<&'de [u8]>, + table: TypeEnv, + types: VecDeque<(usize, Type)>, + wire_type: Type, + expect_type: Type, + // Memo table for subtyping relation + gamma: Gamma, + // field_name tells deserialize_identifier which field name to process. + // This field should always be set by set_field_name function. + field_name: Option, + // Indicates whether to deserialize with IDLValue. + // It only affects the field id generation in enum type. + is_untyped: bool, + zero_sized_values: usize, + minize_error_message: bool, + #[cfg(not(target_arch = "wasm32"))] + recursion_depth: u16, +} + +impl<'de> Deserializer<'de> { + fn from_bytes(bytes: &'de [u8]) -> Result { + let mut reader = Cursor::new(bytes); + let header = Header::read(&mut reader)?; + let (env, types) = header.to_types()?; + Ok(Deserializer { + input: reader, + table: env, + types: types.into_iter().enumerate().collect(), + wire_type: TypeInner::Unknown.into(), + expect_type: TypeInner::Unknown.into(), + gamma: Gamma::default(), + field_name: None, + is_untyped: false, + zero_sized_values: 2_000_000, + minize_error_message: false, + #[cfg(not(target_arch = "wasm32"))] + recursion_depth: 0, + }) + } + fn dump_state(&self) -> String { + let hex = hex::encode(self.input.get_ref()); + let pos = self.input.position() as usize * 2; + let (before, after) = hex.split_at(pos); + let mut res = format!("input: {before}_{after}\n"); + if !self.table.0.is_empty() { + write!(&mut res, "table: {}", self.table).unwrap(); + } + write!( + &mut res, + "wire_type: {}, expect_type: {}", + self.wire_type, self.expect_type + ) + .unwrap(); + if let Some(field) = &self.field_name { + write!(&mut res, ", field_name: {field:?}").unwrap(); + } + res + } + fn borrow_bytes(&mut self, len: usize) -> Result<&'de [u8]> { + let pos = self.input.position() as usize; + let slice = self.input.get_ref(); + if len > slice.len() || pos + len > slice.len() { + return Err(Error::msg(format!("Cannot read {len} bytes"))); + } + let end = pos + len; + let res = &slice[pos..end]; + self.input.set_position(end as u64); + Ok(res) + } + fn check_subtype(&mut self) -> Result<()> { + let res = subtype( + &mut self.gamma, + &self.table, + &self.wire_type, + &self.expect_type, + ); + if res.is_err() { + if self.minize_error_message { + return Err(Error::subtype(format!("{}", self.wire_type))); + } else { + res.with_context(|| { + format!( + "{} is not a subtype of {}", + self.wire_type, self.expect_type, + ) + }) + .map_err(Error::subtype)?; + } + } + Ok(()) + } + fn unroll_type(&mut self) -> Result<()> { + if matches!( + self.expect_type.as_ref(), + TypeInner::Var(_) | TypeInner::Knot(_) + ) { + self.expect_type = self.table.trace_type(&self.expect_type)?; + } + if matches!( + self.wire_type.as_ref(), + TypeInner::Var(_) | TypeInner::Knot(_) + ) { + self.wire_type = self.table.trace_type(&self.wire_type)?; + } + Ok(()) + } + fn is_zero_sized_type(&self, t: &Type) -> bool { + match t.as_ref() { + TypeInner::Null | TypeInner::Reserved => true, + TypeInner::Record(fs) => fs.iter().all(|f| { + let t = self.table.trace_type(&f.ty).unwrap(); + // recursive records have been replaced with empty already, it's safe to call without memoization. + self.is_zero_sized_type(&t) + }), + _ => false, + } + } + // Should always call set_field_name to set the field_name. After deserialize_identifier + // processed the field_name, field_name will be reset to None. + fn set_field_name(&mut self, field: SharedLabel) { + if self.field_name.is_some() { + unreachable!(); + } + self.field_name = Some(field); + } + // Customize deserailization methods + // Several deserialize functions will call visit_byte_buf. + // We reserve the first byte to be a tag to distinguish between different callers: + // int(0), nat(1), principal(2), reserved(3), service(4), function(5) + // This is necessary for deserializing IDLValue because + // it has only one visitor and we need a way to know who called the visitor. + fn deserialize_int<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + assert!(*self.expect_type == TypeInner::Int); + let mut bytes = vec![0u8]; + let int = match self.wire_type.as_ref() { + TypeInner::Int => Int::decode(&mut self.input).map_err(Error::msg)?, + TypeInner::Nat => Int(Nat::decode(&mut self.input) + .map_err(Error::msg)? + .0 + .try_into() + .map_err(Error::msg)?), + t => return Err(Error::subtype(format!("{t} cannot be deserialized to int"))), + }; + bytes.extend_from_slice(&int.0.to_signed_bytes_le()); + visitor.visit_byte_buf(bytes) + } + fn deserialize_nat<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Nat && *self.wire_type == TypeInner::Nat, + "nat" + ); + let mut bytes = vec![1u8]; + let nat = Nat::decode(&mut self.input).map_err(Error::msg)?; + bytes.extend_from_slice(&nat.0.to_bytes_le()); + visitor.visit_byte_buf(bytes) + } + fn deserialize_principal<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Principal && *self.wire_type == TypeInner::Principal, + "principal" + ); + let mut bytes = vec![2u8]; + let id = PrincipalBytes::read(&mut self.input)?.inner; + bytes.extend_from_slice(&id); + visitor.visit_byte_buf(bytes) + } + fn deserialize_reserved<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let bytes = vec![3u8]; + visitor.visit_byte_buf(bytes) + } + fn deserialize_service<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + self.check_subtype()?; + let mut bytes = vec![4u8]; + let id = PrincipalBytes::read(&mut self.input)?.inner; + bytes.extend_from_slice(&id); + visitor.visit_byte_buf(bytes) + } + fn deserialize_function<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + self.check_subtype()?; + if !BoolValue::read(&mut self.input)?.0 { + return Err(Error::msg("Opaque reference not supported")); + } + let mut bytes = vec![5u8]; + let id = PrincipalBytes::read(&mut self.input)?.inner; + let len = Len::read(&mut self.input)?.0; + let meth = self.borrow_bytes(len)?; + // TODO find a better way + leb128::write::unsigned(&mut bytes, len as u64)?; + bytes.extend_from_slice(meth); + bytes.extend_from_slice(&id); + visitor.visit_byte_buf(bytes) + } + fn deserialize_empty<'a, V>(&'a mut self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::subtype("Cannot decode empty type")) + } + fn deserialize_future<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let len = Len::read(&mut self.input)?.0 as u64; + Len::read(&mut self.input)?; + let slice_len = self.input.get_ref().len() as u64; + let pos = self.input.position(); + if len > slice_len || pos + len > slice_len { + return Err(Error::msg(format!("Cannot read {len} bytes"))); + } + self.input.set_position(pos + len); + visitor.visit_unit() + } + fn recoverable_visit_some<'a, V>(&'a mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + use de::Deserializer; + let tid = type_of(&visitor); + if tid != TypeId::of::() // derive Copy + && tid != TypeId::of::() // derive Copy + // OptionVisitor doesn't derive Copy, but has only PhantomData. + // OptionVisitor is private and we cannot get TypeId of OptionVisitor, + // we also cannot downcast V to concrete type, because of 'de + // The only option left seems to be type_name, but it is not guaranteed to be stable, so there is risk here. + && !tid.name.starts_with("serde::de::impls::OptionVisitor<") + { + panic!("Not a valid visitor: {tid:?}"); + } + // This is safe, because the visitor either impl Copy or is zero sized + let v = unsafe { std::ptr::read(&visitor) }; + let mut self_clone = self.clone(); + match v.visit_some(&mut self_clone) { + Ok(v) => { + *self = self_clone; + Ok(v) + } + Err(Error::Subtype(_)) => { + self.deserialize_ignored_any(serde::de::IgnoredAny)?; + visitor.visit_none() + } + Err(e) => Err(e), + } + } +} + +macro_rules! primitive_impl { + ($ty:ident, $type:expr, $($value:tt)*) => { + paste::item! { + fn [](self, visitor: V) -> Result + where V: Visitor<'de> { + self.unroll_type()?; + check!(*self.expect_type == $type && *self.wire_type == $type, stringify!($type)); + let val = self.input.$($value)*().map_err(|_| Error::msg(format!("Cannot read {} value", stringify!($type))))?; + visitor.[](val) + } + } + }; +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { + type Error = Error; + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if self.field_name.is_some() { + return self.deserialize_identifier(visitor); + } + self.unroll_type()?; + match self.expect_type.as_ref() { + TypeInner::Int => self.deserialize_int(visitor), + TypeInner::Nat => self.deserialize_nat(visitor), + TypeInner::Nat8 => self.deserialize_u8(visitor), + TypeInner::Nat16 => self.deserialize_u16(visitor), + TypeInner::Nat32 => self.deserialize_u32(visitor), + TypeInner::Nat64 => self.deserialize_u64(visitor), + TypeInner::Int8 => self.deserialize_i8(visitor), + TypeInner::Int16 => self.deserialize_i16(visitor), + TypeInner::Int32 => self.deserialize_i32(visitor), + TypeInner::Int64 => self.deserialize_i64(visitor), + TypeInner::Float32 => self.deserialize_f32(visitor), + TypeInner::Float64 => self.deserialize_f64(visitor), + TypeInner::Bool => self.deserialize_bool(visitor), + TypeInner::Text => self.deserialize_string(visitor), + TypeInner::Null => self.deserialize_unit(visitor), + TypeInner::Reserved => { + if self.wire_type.as_ref() != &TypeInner::Reserved { + self.deserialize_ignored_any(serde::de::IgnoredAny)?; + } + self.deserialize_reserved(visitor) + } + TypeInner::Empty => self.deserialize_empty(visitor), + TypeInner::Principal => self.deserialize_principal(visitor), + // construct types + TypeInner::Opt(_) => self.deserialize_option(visitor), + TypeInner::Vec(_) => self.deserialize_seq(visitor), + TypeInner::Record(_) => self.deserialize_struct("_", &[], visitor), + TypeInner::Variant(_) => self.deserialize_enum("_", &[], visitor), + TypeInner::Service(_) => self.deserialize_service(visitor), + TypeInner::Func(_) => self.deserialize_function(visitor), + TypeInner::Future => self.deserialize_future(visitor), + _ => assert!(false), + } + } + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let is_untyped = replace(&mut self.is_untyped, true); + self.expect_type = self.wire_type.clone(); + let v = self.deserialize_any(visitor); + self.is_untyped = is_untyped; + v + } + + primitive_impl!(i8, TypeInner::Int8, read_i8); + primitive_impl!(i16, TypeInner::Int16, read_i16::); + primitive_impl!(i32, TypeInner::Int32, read_i32::); + primitive_impl!(i64, TypeInner::Int64, read_i64::); + primitive_impl!(u8, TypeInner::Nat8, read_u8); + primitive_impl!(u16, TypeInner::Nat16, read_u16::); + primitive_impl!(u32, TypeInner::Nat32, read_u32::); + primitive_impl!(u64, TypeInner::Nat64, read_u64::); + primitive_impl!(f32, TypeInner::Float32, read_f32::); + primitive_impl!(f64, TypeInner::Float64, read_f64::); + + fn is_human_readable(&self) -> bool { + false + } + fn deserialize_i128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + use num_traits::ToPrimitive; + self.unroll_type()?; + assert!(*self.expect_type == TypeInner::Int); + let value: i128 = match self.wire_type.as_ref() { + TypeInner::Int => { + let int = Int::decode(&mut self.input).map_err(Error::msg)?; + int.0 + .to_i128() + .ok_or_else(|| Error::msg("Cannot convert int to i128"))? + } + TypeInner::Nat => { + let nat = Nat::decode(&mut self.input).map_err(Error::msg)?; + nat.0 + .to_i128() + .ok_or_else(|| Error::msg("Cannot convert nat to i128"))? + } + t => return Err(Error::subtype(format!("{t} cannot be deserialized to int"))), + }; + visitor.visit_i128(value) + } + fn deserialize_u128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + use num_traits::ToPrimitive; + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Nat && *self.wire_type == TypeInner::Nat, + "nat" + ); + let nat = Nat::decode(&mut self.input).map_err(Error::msg)?; + let value = nat + .0 + .to_u128() + .ok_or_else(|| Error::msg("Cannot convert nat to u128"))?; + visitor.visit_u128(value) + } + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Null + && matches!(*self.wire_type, TypeInner::Null | TypeInner::Reserved), + "unit" + ); + visitor.visit_unit() + } + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Bool && *self.wire_type == TypeInner::Bool, + "bool" + ); + let res = BoolValue::read(&mut self.input)?; + visitor.visit_bool(res.0) + } + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Text && *self.wire_type == TypeInner::Text, + "text" + ); + let len = Len::read(&mut self.input)?.0; + let bytes = self.borrow_bytes(len)?.to_owned(); + let value = String::from_utf8(bytes).map_err(Error::msg)?; + visitor.visit_string(value) + } + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Text && *self.wire_type == TypeInner::Text, + "text" + ); + let len = Len::read(&mut self.input)?.0; + let slice = self.borrow_bytes(len)?; + let value: &str = std::str::from_utf8(slice).map_err(Error::msg)?; + visitor.visit_borrowed_str(value) + } + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.unroll_type()?; + match (self.wire_type.as_ref(), self.expect_type.as_ref()) { + (TypeInner::Null, TypeInner::Opt(_)) | (TypeInner::Reserved, TypeInner::Opt(_)) => { + visitor.visit_none() + } + (TypeInner::Opt(t1), TypeInner::Opt(t2)) => { + self.wire_type = t1.clone(); + self.expect_type = t2.clone(); + if BoolValue::read(&mut self.input)?.0 { + check_recursion! { + self.recoverable_visit_some(visitor) + } + } else { + visitor.visit_none() + } + } + (_, TypeInner::Opt(t2)) => { + self.expect_type = self.table.trace_type(t2)?; + if !matches!( + self.expect_type.as_ref(), + TypeInner::Null | TypeInner::Reserved | TypeInner::Opt(_) + ) { + check_recursion! { + self.recoverable_visit_some(visitor) + } + } else { + self.deserialize_ignored_any(serde::de::IgnoredAny)?; + visitor.visit_none() + } + } + (_, _) => check!(false), + } + } + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + check_recursion! { + self.unroll_type()?; + match (self.expect_type.as_ref(), self.wire_type.as_ref()) { + (TypeInner::Vec(e), TypeInner::Vec(w)) => { + let expect = e.clone(); + let wire = self.table.trace_type(w)?; + let len = Len::read(&mut self.input)?.0; + if self.is_zero_sized_type(&wire) { + if self.zero_sized_values < len { + return Err(Error::msg("vec length of zero sized values too large")); + } + self.zero_sized_values -= len; + } + visitor.visit_seq(Compound::new(self, Style::Vector { len, expect, wire })) + } + (TypeInner::Record(e), TypeInner::Record(w)) => { + let expect = e.clone().into(); + let wire = w.clone().into(); + check!(self.expect_type.is_tuple(), "seq_tuple"); + if !self.wire_type.is_tuple() { + return Err(Error::subtype(format!( + "{} is not a tuple type", + self.wire_type + ))); + } + let value = + visitor.visit_seq(Compound::new(self, Style::Struct { expect, wire }))?; + Ok(value) + } + _ => check!(false), + } + } + } + fn deserialize_byte_buf>(self, visitor: V) -> Result { + self.unroll_type()?; + check!( + *self.expect_type == TypeInner::Vec(TypeInner::Nat8.into()) + && *self.wire_type == TypeInner::Vec(TypeInner::Nat8.into()), + "vec nat8" + ); + let len = Len::read(&mut self.input)?.0; + let bytes = self.borrow_bytes(len)?.to_owned(); + visitor.visit_byte_buf(bytes) + } + fn deserialize_bytes>(self, visitor: V) -> Result { + self.unroll_type()?; + match self.expect_type.as_ref() { + TypeInner::Principal => self.deserialize_principal(visitor), + TypeInner::Vec(t) if **t == TypeInner::Nat8 => { + let len = Len::read(&mut self.input)?.0; + let slice = self.borrow_bytes(len)?; + visitor.visit_borrowed_bytes(slice) + } + _ => Err(Error::subtype("bytes only takes principal or vec nat8")), + } + } + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + check_recursion! { + self.unroll_type()?; + match (self.expect_type.as_ref(), self.wire_type.as_ref()) { + (TypeInner::Vec(e), TypeInner::Vec(w)) => { + let e = self.table.trace_type(e)?; + let w = self.table.trace_type(w)?; + match (e.as_ref(), w.as_ref()) { + (TypeInner::Record(ref e), TypeInner::Record(ref w)) => { + match (&e[..], &w[..]) { + ( + [Field { id: e_id0, ty: ek }, Field { id: e_id1, ty: ev }], + [Field { id: w_id0, ty: wk }, Field { id: w_id1, ty: wv }], + ) if **e_id0 == Label::Id(0) + && **e_id1 == Label::Id(1) + && **w_id0 == Label::Id(0) + && **w_id1 == Label::Id(1) => + { + let expect = (ek.clone(), ev.clone()); + let wire = (wk.clone(), wv.clone()); + let len = Len::read(&mut self.input)?.0; + visitor.visit_map(Compound::new( + self, + Style::Map { len, expect, wire }, + )) + } + _ => Err(Error::subtype("expect a key-value pair")), + } + } + _ => Err(Error::subtype("expect a key-value pair")), + } + } + _ => check!(false), + } + } + } + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + check_recursion! { + self.deserialize_seq(visitor) + } + } + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + check_recursion! { + self.deserialize_seq(visitor) + } + } + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + check_recursion! { + self.unroll_type()?; + match (self.expect_type.as_ref(), self.wire_type.as_ref()) { + (TypeInner::Record(e), TypeInner::Record(w)) => { + let expect = e.clone().into(); + let wire = w.clone().into(); + let value = + visitor.visit_map(Compound::new(self, Style::Struct { expect, wire }))?; + Ok(value) + } + _ => check!(false), + } + } + } + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + check_recursion! { + self.unroll_type()?; + match (self.expect_type.as_ref(), self.wire_type.as_ref()) { + (TypeInner::Variant(e), TypeInner::Variant(w)) => { + let index = Len::read(&mut self.input)?.0; + let len = w.len(); + if index >= len { + return Err(Error::msg(format!( + "Variant index {index} larger than length {len}" + ))); + } + let wire = w[index].clone(); + let expect = match e.iter().find(|f| f.id == wire.id) { + Some(v) => v.clone(), + None => { + return Err(Error::subtype(format!("Unknown variant field {}", wire.id))); + } + }; + visitor.visit_enum(Compound::new(self, Style::Enum { expect, wire })) + } + _ => check!(false), + } + } + } + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.field_name.take() { + Some(l) => match l.as_ref() { + Label::Named(name) => visitor.visit_string(name.to_string()), + Label::Id(hash) | Label::Unnamed(hash) => visitor.visit_u32(*hash), + }, + None => assert!(false), + } + } + + serde::forward_to_deserialize_any! { + char + } +} + +#[derive(Debug)] +enum Style { + Vector { + len: usize, + expect: Type, + wire: Type, + }, + Struct { + expect: VecDeque, + wire: VecDeque, + }, + Enum { + expect: Field, + wire: Field, + }, + Map { + len: usize, + expect: (Type, Type), + wire: (Type, Type), + }, +} + +struct Compound<'a, 'de> { + de: &'a mut Deserializer<'de>, + style: Style, +} + +impl<'a, 'de> Compound<'a, 'de> { + fn new(de: &'a mut Deserializer<'de>, style: Style) -> Self { + Compound { de, style } + } +} + +impl<'de, 'a> de::SeqAccess<'de> for Compound<'a, 'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: de::DeserializeSeed<'de>, + { + match self.style { + Style::Vector { + ref mut len, + ref expect, + ref wire, + } => { + if *len == 0 { + return Ok(None); + } + *len -= 1; + self.de.expect_type = expect.clone(); + self.de.wire_type = wire.clone(); + seed.deserialize(&mut *self.de).map(Some) + } + Style::Struct { + ref mut expect, + ref mut wire, + } => { + if expect.is_empty() && wire.is_empty() { + return Ok(None); + } + self.de.expect_type = expect + .pop_front() + .map(|f| f.ty) + .unwrap_or_else(|| TypeInner::Reserved.into()); + self.de.wire_type = wire + .pop_front() + .map(|f| f.ty) + .unwrap_or_else(|| TypeInner::Reserved.into()); + seed.deserialize(&mut *self.de).map(Some) + } + _ => Err(Error::subtype("expect vector or tuple")), + } + } +} + +impl<'de, 'a> de::MapAccess<'de> for Compound<'a, 'de> { + type Error = Error; + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + match self.style { + Style::Struct { + ref mut expect, + ref mut wire, + } => { + match (expect.front(), wire.front()) { + (Some(e), Some(w)) => { + use std::cmp::Ordering; + match e.id.get_id().cmp(&w.id.get_id()) { + Ordering::Equal => { + self.de.set_field_name(e.id.clone()); + self.de.expect_type = expect.pop_front().unwrap().ty; + self.de.wire_type = wire.pop_front().unwrap().ty; + } + Ordering::Less => { + // by subtyping rules, expect_type can only be opt, reserved or null. + let field = e.id.clone(); + self.de.set_field_name(field.clone()); + let expect = expect.pop_front().unwrap().ty; + self.de.expect_type = self.de.table.trace_type(&expect)?; + check!( + matches!( + self.de.expect_type.as_ref(), + TypeInner::Opt(_) | TypeInner::Reserved | TypeInner::Null + ), + format!("field {field} is not optional field") + ); + self.de.wire_type = TypeInner::Reserved.into(); + } + Ordering::Greater => { + self.de.set_field_name(Label::Named("_".to_owned()).into()); + self.de.wire_type = wire.pop_front().unwrap().ty; + self.de.expect_type = TypeInner::Reserved.into(); + } + } + } + (None, Some(_)) => { + self.de.set_field_name(Label::Named("_".to_owned()).into()); + self.de.wire_type = wire.pop_front().unwrap().ty; + self.de.expect_type = TypeInner::Reserved.into(); + } + (Some(e), None) => { + self.de.set_field_name(e.id.clone()); + self.de.expect_type = expect.pop_front().unwrap().ty; + self.de.wire_type = TypeInner::Reserved.into(); + } + (None, None) => return Ok(None), + } + seed.deserialize(&mut *self.de).map(Some) + } + Style::Map { + ref mut len, + ref expect, + ref wire, + } => { + // This only comes from deserialize_map + if *len == 0 { + return Ok(None); + } + self.de.expect_type = expect.0.clone(); + self.de.wire_type = wire.0.clone(); + *len -= 1; + seed.deserialize(&mut *self.de).map(Some) + } + _ => Err(Error::msg("expect struct or map")), + } + } + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + match &self.style { + Style::Map { expect, wire, .. } => { + self.de.expect_type = expect.1.clone(); + self.de.wire_type = wire.1.clone(); + seed.deserialize(&mut *self.de) + } + _ => seed.deserialize(&mut *self.de), + } + } +} + +impl<'de, 'a> de::EnumAccess<'de> for Compound<'a, 'de> { + type Error = Error; + type Variant = Self; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: de::DeserializeSeed<'de>, + { + match &self.style { + Style::Enum { expect, wire } => { + self.de.expect_type = expect.ty.clone(); + self.de.wire_type = wire.ty.clone(); + let (mut label, label_type) = match expect.id.as_ref() { + Label::Named(name) => (name.clone(), "name"), + Label::Id(hash) | Label::Unnamed(hash) => (hash.to_string(), "id"), + }; + if self.de.is_untyped { + let accessor = match expect.ty.as_ref() { + TypeInner::Null => "unit", + TypeInner::Record(_) => "struct", + _ => "newtype", + }; + write!(&mut label, ",{label_type},{accessor}").map_err(Error::msg)?; + } + self.de.set_field_name(Label::Named(label).into()); + let field = seed.deserialize(&mut *self.de)?; + Ok((field, self)) + } + _ => Err(Error::subtype("expect enum")), + } + } +} + +impl<'de, 'a> de::VariantAccess<'de> for Compound<'a, 'de> { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + check!( + *self.de.expect_type == TypeInner::Null && *self.de.wire_type == TypeInner::Null, + "unit_variant" + ); + Ok(()) + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: de::DeserializeSeed<'de>, + { + seed.deserialize(self.de) + } + + fn tuple_variant(self, len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_tuple(self.de, len, visitor) + } + + fn struct_variant(self, fields: &'static [&'static str], visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_struct(self.de, "_", fields, visitor) + } +} diff --git a/rust/candid_parser/src/error.rs b/rust/candid_parser/src/error.rs new file mode 100644 index 00000000..9181d4a0 --- /dev/null +++ b/rust/candid_parser/src/error.rs @@ -0,0 +1,229 @@ +//! `candid::Result = Result>` + +use codespan_reporting::diagnostic::Label; +use serde::{de, ser}; +use std::io; +use thiserror::Error; + +#[cfg(feature = "parser")] +use crate::parser::token; +#[cfg(feature = "parser")] +use codespan_reporting::{ + diagnostic::Diagnostic, + files::{Error as ReportError, SimpleFile}, + term::{self, termcolor::StandardStream}, +}; + +pub type Result = std::result::Result; + +#[derive(Debug, Error)] +pub enum Error { + #[cfg_attr(docsrs, doc(cfg(feature = "parser")))] + #[cfg(feature = "parser")] + #[error("Candid parser error: {0}")] + Parse(#[from] token::ParserError), + + #[error("binary parser error: {}", .0.get(0).map(|f| format!("{} at byte offset {}", f.message, f.range.start/2)).unwrap_or_else(|| "io error".to_string()))] + Binread(Vec>), + + #[error("Subtyping error: {0}")] + Subtype(String), + + #[error(transparent)] + Custom(#[from] anyhow::Error), +} + +impl Error { + pub fn msg(msg: T) -> Self { + Error::Custom(anyhow::anyhow!(msg.to_string())) + } + pub fn subtype(msg: T) -> Self { + Error::Subtype(msg.to_string()) + } + #[cfg_attr(docsrs, doc(cfg(feature = "parser")))] + #[cfg(feature = "parser")] + pub fn report(&self) -> Diagnostic<()> { + match self { + Error::Parse(e) => { + use lalrpop_util::ParseError::*; + let mut diag = Diagnostic::error().with_message("parser error"); + let label = match e { + User { error } => { + Label::primary((), error.span.clone()).with_message(&error.err) + } + InvalidToken { location } => { + Label::primary((), *location..location + 1).with_message("Invalid token") + } + UnrecognizedEof { location, expected } => { + diag = diag.with_notes(report_expected(expected)); + Label::primary((), *location..location + 1).with_message("Unexpected EOF") + } + UnrecognizedToken { token, expected } => { + diag = diag.with_notes(report_expected(expected)); + Label::primary((), token.0..token.2).with_message("Unexpected token") + } + ExtraToken { token } => { + Label::primary((), token.0..token.2).with_message("Extra token") + } + }; + diag.with_labels(vec![label]) + } + Error::Binread(labels) => { + let diag = Diagnostic::error().with_message("decoding error"); + diag.with_labels(labels.to_vec()) + } + Error::Subtype(e) => Diagnostic::error().with_message(e), + Error::Custom(e) => Diagnostic::error().with_message(e.to_string()), + } + } +} + +fn get_binread_labels(e: &binread::Error) -> Vec> { + use binread::Error::*; + match e { + BadMagic { pos, .. } => { + let pos = (pos * 2) as usize; + vec![Label::primary((), pos..pos + 2).with_message("Unexpected bytes")] + } + Custom { pos, err } => { + let pos = (pos * 2) as usize; + let err = err + .downcast_ref::<&str>() + .unwrap_or(&"unknown error (there's a bug in error reporting)"); + vec![Label::primary((), pos..pos + 2).with_message(err.to_string())] + } + EnumErrors { + pos, + variant_errors, + } => { + let pos = (pos * 2) as usize; + let variant = variant_errors + .iter() + .find(|(_, e)| !matches!(e, BadMagic { .. })); + // Should have at most one non-magic error + match variant { + None => vec![Label::primary((), pos..pos + 2).with_message("Unknown opcode")], + Some((id, e)) => { + let mut labels = get_binread_labels(e); + labels.push(Label::secondary((), pos..pos + 2).with_message(id.to_string())); + labels + } + } + } + NoVariantMatch { pos } => { + let pos = (pos * 2) as usize; + vec![Label::primary((), pos..pos + 2).with_message("No variant match")] + } + AssertFail { pos, message } => { + let pos = (pos * 2) as usize; + vec![Label::primary((), pos..pos + 2).with_message(message)] + } + Io(_) => vec![], + _ => unreachable!(), + } +} + +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +fn report_expected(expected: &[String]) -> Vec { + if expected.is_empty() { + return Vec::new(); + } + use pretty::RcDoc; + let doc: RcDoc<()> = RcDoc::intersperse( + expected.iter().map(RcDoc::text), + RcDoc::text(",").append(RcDoc::softline()), + ); + let header = if expected.len() == 1 { + "Expects" + } else { + "Expects one of" + }; + let doc = RcDoc::text(header).append(RcDoc::softline().append(doc)); + vec![doc.pretty(70).to_string()] +} + +impl ser::Error for Error { + fn custom(msg: T) -> Self { + Error::msg(format!("Serialize error: {msg}")) + } +} + +impl de::Error for Error { + fn custom(msg: T) -> Self { + let msg = msg.to_string(); + if let Some(msg) = msg.strip_prefix("Subtyping error: ") { + Error::Subtype(msg.to_string()) + } else { + Error::msg(format!("Deserialize error: {msg}")) + } + } + fn invalid_type(_: de::Unexpected<'_>, exp: &dyn de::Expected) -> Self { + Error::Subtype(format!("{exp}")) + } +} + +impl From for Error { + fn from(e: io::Error) -> Error { + Error::msg(format!("io error: {e}")) + } +} + +impl From for Error { + fn from(e: binread::Error) -> Error { + Error::Binread(get_binread_labels(&e)) + } +} +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +impl From for Error { + fn from(e: ReportError) -> Error { + Error::msg(e) + } +} +#[cfg_attr(docsrs, doc(cfg(feature = "random")))] +#[cfg(feature = "random")] +impl From for Error { + fn from(e: arbitrary::Error) -> Error { + Error::msg(format!("arbitrary error: {e}")) + } +} + +#[cfg_attr(docsrs, doc(cfg(feature = "configs")))] +#[cfg(feature = "configs")] +impl From for Error { + fn from(e: serde_dhall::Error) -> Error { + Error::msg(format!("dhall error: {e}")) + } +} + +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub fn pretty_parse(name: &str, str: &str) -> Result +where + T: std::str::FromStr, +{ + str.parse::().or_else(|e| { + let writer = StandardStream::stderr(term::termcolor::ColorChoice::Auto); + let config = term::Config::default(); + let file = SimpleFile::new(name, str); + term::emit(&mut writer.lock(), &config, &file, &e.report())?; + Err(e) + }) +} +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub fn pretty_read(reader: &mut std::io::Cursor<&[u8]>) -> Result +where + T: binread::BinRead, +{ + T::read(reader).or_else(|e| { + let e = Error::from(e); + let writer = StandardStream::stderr(term::termcolor::ColorChoice::Auto); + let config = term::Config::default(); + let str = hex::encode(reader.get_ref()); + let file = SimpleFile::new("binary", &str); + term::emit(&mut writer.lock(), &config, &file, &e.report())?; + Err(e) + }) +} diff --git a/rust/candid_parser/src/lib.rs b/rust/candid_parser/src/lib.rs new file mode 100644 index 00000000..446735af --- /dev/null +++ b/rust/candid_parser/src/lib.rs @@ -0,0 +1,381 @@ +//! # Candid +//! +//! Candid is an interface description language (IDL) for interacting with _canisters_ (also known as _services_ or _actors_) running on the Internet Computer. +//! +//! There are three common ways that you might find yourself needing to work with Candid in Rust. +//! - As a typed Rust data structure. When you write canisters or frontend in Rust, you want to have a seamless way of converting data between Rust and Candid. +//! - As an untyped Candid value. When you write generic tools for the Internet Computer without knowing the type of the Candid data. +//! - As text data. When you get the data from CLI or read from a file, you can use the provided parser to send/receive messages. +//! +//! Candid provides efficient, flexible and safe ways of converting data between each of these representations. +//! +//! ## Operating on native Rust values +//! We are using a builder pattern to encode/decode Candid messages, see [`candid::ser::IDLBuilder`](ser/struct.IDLBuilder.html) for serialization and [`candid::de::IDLDeserialize`](de/struct.IDLDeserialize.html) for deserialization. +//! +//! ``` +//! // Serialize 10 numbers to Candid binary format +//! let mut ser = candid::ser::IDLBuilder::new(); +//! for i in 0..10 { +//! ser.arg(&i)?; +//! } +//! let bytes: Vec = ser.serialize_to_vec()?; +//! +//! // Deserialize Candid message and verify the values match +//! let mut de = candid::de::IDLDeserialize::new(&bytes)?; +//! let mut i = 0; +//! while !de.is_done() { +//! let x = de.get_value::()?; +//! assert_eq!(x, i); +//! i += 1; +//! } +//! de.done()?; +//! # Ok::<(), candid::Error>(()) +//! ``` +//! +//! Candid provides functions for encoding/decoding a Candid message in a type-safe way. +//! +//! ``` +//! use candid::{encode_args, decode_args}; +//! // Serialize two values [(42, "text")] and (42u32, "text") +//! let bytes: Vec = encode_args((&[(42, "text")], &(42u32, "text")))?; +//! // Deserialize the first value as type Vec<(i32, &str)>, +//! // and the second value as type (u32, String) +//! let (a, b): (Vec<(i32, &str)>, (u32, String)) = decode_args(&bytes)?; +//! +//! assert_eq!(a, [(42, "text")]); +//! assert_eq!(b, (42u32, "text".to_string())); +//! # Ok::<(), candid::Error>(()) +//! ``` +//! +//! We also provide macros for encoding/decoding Candid message in a convenient way. +//! +//! ``` +//! use candid::{Encode, Decode}; +//! // Serialize two values [(42, "text")] and (42u32, "text") +//! let bytes: Vec = Encode!(&[(42, "text")], &(42u32, "text"))?; +//! // Deserialize the first value as type Vec<(i32, &str)>, +//! // and the second value as type (u32, String) +//! let (a, b) = Decode!(&bytes, Vec<(i32, &str)>, (u32, String))?; +//! +//! assert_eq!(a, [(42, "text")]); +//! assert_eq!(b, (42u32, "text".to_string())); +//! # Ok::<(), candid::Error>(()) +//! ``` +//! +//! The [`Encode!`](macro.Encode.html) macro takes a sequence of Rust values, and returns a binary format `Vec` that can be sent over the wire. +//! The [`Decode!`](macro.Decode.html) macro takes the binary message and a sequence of Rust types that you want to decode into, and returns a tuple +//! of Rust values of the given types. +//! +//! Note that a fixed Candid message may be decoded in multiple Rust types. For example, +//! we can decode a Candid `text` type into either `String` or `&str` in Rust. +//! +//! ## Operating on user defined struct/enum +//! We use trait [`CandidType`](types/trait.CandidType.html) for serialization. Deserialization requires both [`CandidType`](types/trait.CandidType.html) and Serde's [`Deserialize`](trait.Deserialize.html) trait. +//! Any type that implements these two traits can be used for serialization and deserialization. +//! This includes built-in Rust standard library types like `Vec` and `Result`, as well as any structs +//! or enums annotated with `#[derive(CandidType, Deserialize)]`. +//! +//! We do not use Serde's `Serialize` trait because Candid requires serializing types along with the values. +//! This is difficult to achieve in `Serialize`, especially for enum types. Besides serialization, [`CandidType`](types/trait.CandidType.html) +//! trait also converts Rust type to Candid type defined as [`candid::types::Type`](types/internal/enum.Type.html). +//! ``` +//! use candid::{Encode, Decode, CandidType, Deserialize}; +//! #[derive(CandidType, Deserialize)] +//! # #[derive(Debug, PartialEq)] +//! enum List { +//! #[serde(rename = "nil")] +//! Nil, +//! #[serde(with = "serde_bytes")] +//! Node(Vec), +//! Cons(i32, Box), +//! } +//! let list = List::Cons(42, Box::new(List::Nil)); +//! +//! let bytes = Encode!(&list)?; +//! let res = Decode!(&bytes, List)?; +//! assert_eq!(res, list); +//! # Ok::<(), candid::Error>(()) +//! ``` +//! We support serde's rename attributes for each field, namely `#[serde(rename = "foo")]` +//! and `#[serde(rename(serialize = "foo", deserialize = "foo"))]`. +//! This is useful when interoperating between Rust and Motoko canisters involving variant types, because +//! they use different naming conventions for field names. +//! +//! We support `#[serde(with = "serde_bytes")]` for efficient handling of `&[u8]` and `Vec`. You can +//! also use the wrapper type `serde_bytes::ByteBuf` and `serde_bytes::Bytes`. +//! +//! Note that if you are deriving `Deserialize` trait from Candid, you need to import `serde` as a dependency in +//! your project, as the derived implementation will refer to the `serde` crate. +//! +//! ## Operating on big integers +//! To support big integer types [`Candid::Int`](types/number/struct.Int.html) and [`Candid::Nat`](types/number/struct.Nat.html), +//! we use the `num_bigint` crate. We provide interface to convert `i64`, `u64`, `&str` and `&[u8]` to big integers. +//! You can also use `i128` and `u128` to represent Candid `int` and `nat` types respectively (decoding will fail if +//! the number is more than 128 bits). +//! ``` +//! use candid::{Int, Nat, Encode, Decode}; +//! let x = "-10000000000000000000".parse::()?; +//! let bytes = Encode!(&Nat::from(1024), &x)?; +//! let (a, b) = Decode!(&bytes, Nat, Int)?; +//! let (c, d) = Decode!(&bytes, u128, i128)?; +//! assert_eq!(a + 1, 1025); +//! assert_eq!(b, Int::parse(b"-10000000000000000000")?); +//! # Ok::<(), candid::Error>(()) +//! ``` +//! +//! ## Operating on reference types +//! The type of function and service references cannot be derived automatically. We provide +//! two macros [`define_function!`](macro.define_function.html) and [`define_service!`](macro.define_service.html) to help defining the reference types. To specify reference types in the macro, you need to use the corresponding Rust types, +//! instead of the Candid types. +//! +//! ``` +//! use candid::{define_function, define_service, func, Encode, Decode, Principal}; +//! let principal = Principal::from_text("aaaaa-aa").unwrap(); +//! +//! define_function!(pub CustomFunc : (u8, &str) -> (u128)); +//! let func = CustomFunc::new(principal, "method_name".to_string()); +//! assert_eq!(func, Decode!(&Encode!(&func)?, CustomFunc)?); +//! +//! define_service!(MyService : { +//! "f": CustomFunc::ty(); +//! "g": func!((candid::Int) -> (candid::Nat, CustomFunc) query) +//! }); +//! let serv = MyService::new(principal); +//! assert_eq!(serv, Decode!(&Encode!(&serv)?, MyService)?); +//! # Ok::<(), candid::Error>(()) +//! ``` +//! +//! ## Operating on untyped Candid values +//! Any valid Candid value can be manipulated in an recursive enum representation [`candid::parser::value::IDLValue`](parser/value/enum.IDLValue.html). +//! We use `ser.value_arg(v)` and `de.get_value::()` for encoding and decoding the value. +//! The use of Rust value and `IDLValue` can be intermixed. +//! +//! ``` +//! use candid::types::value::IDLValue; +//! // Serialize Rust value Some(42u8) and IDLValue "hello" +//! let bytes = candid::ser::IDLBuilder::new() +//! .arg(&Some(42u8))? +//! .value_arg(&IDLValue::Text("hello".to_string()))? +//! .serialize_to_vec()?; +//! +//! // Deserialize the first Rust value into IDLValue, +//! // and the second IDLValue into Rust value +//! let mut de = candid::de::IDLDeserialize::new(&bytes)?; +//! let x = de.get_value::()?; +//! let y = de.get_value::<&str>()?; +//! de.done()?; +//! +//! assert_eq!(x, IDLValue::Opt(Box::new(IDLValue::Nat8(42)))); +//! assert_eq!(y, "hello"); +//! # Ok::<(), candid::Error>(()) +//! ``` +//! +//! We provide a data structure [`candid::IDLArgs`](parser/value/struct.IDLArgs.html) to represent a sequence of `IDLValue`s, +//! and use `to_bytes()` and `from_bytes()` to encode and decode Candid messages. +//! We also provide a parser to parse Candid values in text format. +//! +//! ``` +//! #[cfg(feature = "parser")] +//! # fn f() -> Result<(), candid::Error> { +//! use candid::{IDLArgs, TypeEnv}; +//! // Candid values represented in text format +//! let text_value = r#" +//! (42, opt true, vec {1;2;3}, +//! opt record {label="text"; 42="haha"}) +//! "#; +//! +//! // Parse text format into IDLArgs for serialization +//! let args: IDLArgs = text_value.parse()?; +//! let encoded: Vec = args.to_bytes()?; +//! +//! // Deserialize into IDLArgs +//! let decoded: IDLArgs = IDLArgs::from_bytes(&encoded)?; +//! assert_eq!(encoded, decoded.to_bytes()?); +//! +//! // Convert IDLArgs to text format +//! let output: String = decoded.to_string(); +//! let parsed_args: IDLArgs = output.parse()?; +//! let annotated_args = args.annotate_types(true, &TypeEnv::new(), &parsed_args.get_types())?; +//! assert_eq!(annotated_args, parsed_args); +//! # Ok(()) +//! # } +//! ``` +//! Note that when parsing Candid values, we assume the number literals are always of type `Int`. +//! This can be changed by providing the type of the method arguments, which can usually be obtained +//! by parsing a Candid file in the following section. +//! +//! ## Operating on Candid AST +//! We provide a parser and type checker for Candid files specifying the service interface. +//! +//! ``` +//! #[cfg(feature = "parser")] +//! # fn f() -> Result<(), candid::Error> { +//! use candid::{IDLProg, TypeEnv, check_prog, types::{Type, TypeInner}}; +//! let did_file = r#" +//! type List = opt record { head: int; tail: List }; +//! type byte = nat8; +//! service : { +//! f : (byte, int, nat, int8) -> (List); +//! g : (List) -> (int) query; +//! } +//! "#; +//! +//! // Parse did file into an AST +//! let ast: IDLProg = did_file.parse()?; +//! +//! // Type checking a given .did file +//! // let (env, opt_actor) = check_file("a.did")?; +//! // Or alternatively, use check_prog to check in-memory did file +//! // Note that file import is ignored by check_prog. +//! let mut env = TypeEnv::new(); +//! let actor: Type = check_prog(&mut env, &ast)?.unwrap(); +//! +//! let method = env.get_method(&actor, "g").unwrap(); +//! assert_eq!(method.is_query(), true); +//! assert_eq!(method.args, vec![TypeInner::Var("List".to_string()).into()]); +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Serializing untyped Candid values with type annotations. +//! With type signatures from the Candid file, [`candid::IDLArgs`](parser/value/struct.IDLArgs.html) +//! uses `to_bytes_with_types` function to serialize arguments directed by the Candid types. +//! This is useful when serializing different number types and recursive types. +//! There is no need to use types for deserialization as the types are available in the Candid message. +//! +//! ``` +//! #[cfg(feature = "parser")] +//! # fn f() -> Result<(), candid::Error> { +//! use candid::{IDLArgs, types::value::IDLValue}; +//! # use candid::{IDLProg, TypeEnv, check_prog}; +//! # let did_file = r#" +//! # type List = opt record { head: int; tail: List }; +//! # type byte = nat8; +//! # service : { +//! # f : (byte, int, nat, int8) -> (List); +//! # g : (List) -> (int) query; +//! # } +//! # "#; +//! # let ast = did_file.parse::()?; +//! # let mut env = TypeEnv::new(); +//! # let actor = check_prog(&mut env, &ast)?.unwrap(); +//! // Get method type f : (byte, int, nat, int8) -> (List) +//! let method = env.get_method(&actor, "f").unwrap(); +//! let args = "(42, 42, 42, 42)".parse::()?; +//! // Serialize arguments with candid types +//! let encoded = args.to_bytes_with_types(&env, &method.args)?; +//! let decoded = IDLArgs::from_bytes(&encoded)?; +//! assert_eq!(decoded.args, +//! vec![IDLValue::Nat8(42), +//! IDLValue::Int(42.into()), +//! IDLValue::Nat(42.into()), +//! IDLValue::Int8(42) +//! ]); +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Building the library as a JS/Wasm package +//! With the help of `wasm-bindgen` and `wasm-pack`, we can build the library as a Wasm binary and run in the browser. +//! This is useful for client-side UIs and parsing did files in JavaScript. +//! +//! Create a new project with the following `Cargo.toml`. +//! ```toml +//! [lib] +//! crate-type = ["cdylib"] +//! +//! [dependencies] +//! wasm-bindgen = "0.2" +//! candid = "0.9.0" +//! +//! [profile.release] +//! lto = true +//! opt-level = 'z' +//! ``` +//! Expose the methods in `lib.rs` +//! ```ignore +//! use candid::{check_prog, IDLProg, TypeEnv}; +//! use wasm_bindgen::prelude::*; +//! #[wasm_bindgen] +//! pub fn did_to_js(prog: String) -> Option { +//! let ast = prog.parse::().ok()?; +//! let mut env = TypeEnv::new(); +//! let actor = check_prog(&mut env, &ast).ok()?; +//! Some(candid::bindings::javascript::compile(&env, &actor)) +//! } +//! ``` +//! ### Building +//! ```shell +//! cargo install wasm-pack +//! wasm-pack build --target bundler +//! wasm-opt --strip-debug -Oz pkg/didc_bg.wasm -o pkg/didc_bg.wasm +//! ``` +//! ### Usage +//! ```js +//! const didc = import("pkg/didc"); +//! didc.then((mod) => { +//! const service = "service : {}"; +//! const js = mod.did_to_js(service); +//! }); +//! ``` +//! +//! + +// only enables the `doc_cfg` feature when +// the `docsrs` configuration attribute is defined +#![cfg_attr(docsrs, feature(doc_cfg))] + +pub use candid_derive::{candid_method, export_service, CandidType}; +pub use serde::Deserialize; + +pub mod error; +pub use error::{Error, Result}; + +pub mod types; +pub use types::CandidType; +pub use types::{ + arc, + number::{Int, Nat}, + principal::Principal, + rc, + reference::{Func, Service}, + reserved::{Empty, Reserved}, + value::{IDLArgs, IDLValue}, + TypeEnv, +}; + +#[allow(dead_code)] +pub mod binary_parser; +pub mod de; +pub mod ser; + +pub mod utils; +pub use utils::{decode_args, decode_one, encode_args, encode_one, write_args}; +pub mod pretty; + +#[cfg_attr(docsrs, doc(cfg(feature = "parser")))] +#[cfg(feature = "parser")] +pub mod parser; +#[cfg(feature = "parser")] +pub use error::{pretty_parse, pretty_read}; +#[cfg(feature = "parser")] +pub use parser::{ + types::IDLProg, + typing::{check_file, check_prog, pretty_check_file}, +}; + +pub mod bindings; + +// Candid hash function comes from +// https://caml.inria.fr/pub/papers/garrigue-polymorphic_variants-ml98.pdf +// Not public API. Only used by tests. +// Remember to update the same function in candid_derive if you change this function. +#[doc(hidden)] +#[inline] +pub fn idl_hash(id: &str) -> u32 { + let mut s: u32 = 0; + for c in id.as_bytes().iter() { + s = s.wrapping_mul(223).wrapping_add(*c as u32); + } + s +} diff --git a/rust/candid/src/parser/configs.rs b/rust/candid_parser/src/parser/configs.rs similarity index 100% rename from rust/candid/src/parser/configs.rs rename to rust/candid_parser/src/parser/configs.rs diff --git a/rust/candid/src/parser/grammar.lalrpop b/rust/candid_parser/src/parser/grammar.lalrpop similarity index 100% rename from rust/candid/src/parser/grammar.lalrpop rename to rust/candid_parser/src/parser/grammar.lalrpop diff --git a/rust/candid/src/parser/grammar.rs b/rust/candid_parser/src/parser/grammar.rs similarity index 100% rename from rust/candid/src/parser/grammar.rs rename to rust/candid_parser/src/parser/grammar.rs diff --git a/rust/candid/src/parser/mod.rs b/rust/candid_parser/src/parser/mod.rs similarity index 100% rename from rust/candid/src/parser/mod.rs rename to rust/candid_parser/src/parser/mod.rs diff --git a/rust/candid/src/parser/random.rs b/rust/candid_parser/src/parser/random.rs similarity index 100% rename from rust/candid/src/parser/random.rs rename to rust/candid_parser/src/parser/random.rs diff --git a/rust/candid/src/parser/test.rs b/rust/candid_parser/src/parser/test.rs similarity index 100% rename from rust/candid/src/parser/test.rs rename to rust/candid_parser/src/parser/test.rs diff --git a/rust/candid/src/parser/token.rs b/rust/candid_parser/src/parser/token.rs similarity index 100% rename from rust/candid/src/parser/token.rs rename to rust/candid_parser/src/parser/token.rs diff --git a/rust/candid/src/parser/types.rs b/rust/candid_parser/src/parser/types.rs similarity index 100% rename from rust/candid/src/parser/types.rs rename to rust/candid_parser/src/parser/types.rs diff --git a/rust/candid/src/parser/typing.rs b/rust/candid_parser/src/parser/typing.rs similarity index 100% rename from rust/candid/src/parser/typing.rs rename to rust/candid_parser/src/parser/typing.rs diff --git a/rust/candid_parser/src/pretty.rs b/rust/candid_parser/src/pretty.rs new file mode 100644 index 00000000..937a0885 --- /dev/null +++ b/rust/candid_parser/src/pretty.rs @@ -0,0 +1,89 @@ +use pretty::RcDoc; + +pub const INDENT_SPACE: isize = 2; +pub const LINE_WIDTH: usize = 80; + +fn is_empty(doc: &RcDoc) -> bool { + use pretty::Doc::*; + match &**doc { + Nil => true, + FlatAlt(t1, t2) => is_empty(t1) && is_empty(t2), + Group(t) => is_empty(t), + Nest(_, t) => is_empty(t), + Union(t1, t2) => is_empty(t1) && is_empty(t2), + Annotated(_, t) => is_empty(t), + _ => false, + } +} + +pub fn enclose<'a>(left: &'a str, doc: RcDoc<'a>, right: &'a str) -> RcDoc<'a> { + if is_empty(&doc) { + RcDoc::text(left).append(right) + } else { + RcDoc::text(left) + .append(RcDoc::line_()) + .append(doc) + .nest(INDENT_SPACE) + .append(RcDoc::line_()) + .append(right) + .group() + } +} + +pub fn enclose_space<'a>(left: &'a str, doc: RcDoc<'a>, right: &'a str) -> RcDoc<'a> { + if is_empty(&doc) { + RcDoc::text(left).append(right) + } else { + RcDoc::text(left) + .append(RcDoc::line()) + .append(doc) + .nest(INDENT_SPACE) + .append(RcDoc::line()) + .append(right) + .group() + } +} + +/// Intersperse the separator between each item in `docs`. +pub fn strict_concat<'a, D>(docs: D, sep: &'a str) -> RcDoc<'a> +where + D: Iterator>, +{ + RcDoc::intersperse(docs, RcDoc::text(sep).append(RcDoc::line())) +} + +/// Append the separator to each item in `docs`. If it is displayed in a single line, omit the last separator. +pub fn concat<'a, D>(docs: D, sep: &'a str) -> RcDoc<'a> +where + D: Iterator> + Clone, +{ + RcDoc::intersperse(docs.clone().map(|d| d.append(sep)), RcDoc::line()).flat_alt( + RcDoc::intersperse(docs, RcDoc::text(sep).append(RcDoc::line())), + ) +} + +pub fn lines<'a, D>(docs: D) -> RcDoc<'a> +where + D: Iterator>, +{ + RcDoc::concat(docs.map(|doc| doc.append(RcDoc::hardline()))) +} + +pub fn kwd(str: &U) -> RcDoc { + RcDoc::as_string(str).append(RcDoc::space()) +} + +pub fn str(str: &str) -> RcDoc { + RcDoc::text(str) +} + +pub fn ident(id: &str) -> RcDoc { + kwd(id) +} + +pub fn quote_ident(id: &str) -> RcDoc { + str("'") + .append(format!("{}", id.escape_debug())) + .append("'") + .append(RcDoc::space()) +} diff --git a/rust/candid_parser/src/ser.rs b/rust/candid_parser/src/ser.rs new file mode 100644 index 00000000..e36e7032 --- /dev/null +++ b/rust/candid_parser/src/ser.rs @@ -0,0 +1,411 @@ +//! Serialize a Rust data structure to Candid binary format + +use super::error::{Error, Result}; +use super::types; +use super::types::value::IDLValue; +use super::types::{internal::Opcode, Field, Type, TypeEnv, TypeInner}; +use byteorder::{LittleEndian, WriteBytesExt}; +use leb128::write::{signed as sleb128_encode, unsigned as leb128_encode}; +use std::collections::HashMap; +use std::io; +use std::vec::Vec; + +/// Use this struct to serialize a sequence of Rust values (heterogeneous) to IDL binary message. +#[derive(Default)] +pub struct IDLBuilder { + type_ser: TypeSerialize, + value_ser: ValueSerializer, +} + +impl IDLBuilder { + pub fn new() -> Self { + // We cannot share the memo table across different Builder. Because the same Rust + // type can map to a different but equivalent candid type for different builder, + // due to memo match happening in different time/order. + types::internal::env_clear(); + IDLBuilder { + type_ser: TypeSerialize::new(), + value_ser: ValueSerializer::new(), + } + } + pub fn arg<'a, T: types::CandidType>(&'a mut self, value: &T) -> Result<&'a mut Self> { + self.type_ser.push_type(&T::ty())?; + value.idl_serialize(&mut self.value_ser)?; + Ok(self) + } + pub fn value_arg<'a>(&'a mut self, value: &IDLValue) -> Result<&'a mut Self> { + use super::CandidType; + self.type_ser.push_type(&value.value_ty())?; + value.idl_serialize(&mut self.value_ser)?; + Ok(self) + } + /// Annotate IDLValue with (TypeEnv, Type). Note that the TypeEnv will be added to the serializer state. + /// If the Type can already be resolved by previous TypeEnvs, you don't need to pass TypeEnv again. + pub fn value_arg_with_type<'a>( + &'a mut self, + value: &IDLValue, + env: &TypeEnv, + t: &Type, + ) -> Result<&'a mut Self> { + use super::CandidType; + let env = self.type_ser.env.merge(env)?; + let v = value.annotate_type(true, env, t)?; + self.type_ser.push_type(t)?; + v.idl_serialize(&mut self.value_ser)?; + Ok(self) + } + pub fn serialize(&mut self, mut writer: W) -> Result<()> { + writer.write_all(b"DIDL")?; + self.type_ser.serialize()?; + writer.write_all(self.type_ser.get_result())?; + writer.write_all(self.value_ser.get_result())?; + Ok(()) + } + pub fn serialize_to_vec(&mut self) -> Result> { + let mut vec = Vec::new(); + self.serialize(&mut vec)?; + Ok(vec) + } +} + +/// A structure for serializing Rust values to IDL. +#[derive(Default)] +pub struct ValueSerializer { + value: Vec, +} + +impl ValueSerializer { + /// Creates a new IDL serializer. + #[inline] + pub fn new() -> Self { + ValueSerializer { value: Vec::new() } + } + pub fn get_result(&self) -> &[u8] { + &self.value + } + #[doc(hidden)] + pub fn write_leb128(&mut self, value: u64) -> Result<()> { + leb128_encode(&mut self.value, value)?; + Ok(()) + } + #[doc(hidden)] + pub fn write(&mut self, bytes: &[u8]) -> Result<()> { + use std::io::Write; + self.value.write_all(bytes)?; + Ok(()) + } +} + +macro_rules! serialize_num { + ($name:ident, $ty:ty, $($method:tt)*) => { + paste::item! { + fn [](self, v: $ty) -> Result<()> { + self.value.$($method)*(v)?; + Ok(()) + } + } + }; +} + +impl<'a> types::Serializer for &'a mut ValueSerializer { + type Error = Error; + type Compound = Compound<'a>; + fn serialize_bool(self, v: bool) -> Result<()> { + self.write(&[v as u8])?; + Ok(()) + } + fn serialize_int(self, v: &crate::Int) -> Result<()> { + v.encode(&mut self.value) + } + fn serialize_nat(self, v: &crate::Nat) -> Result<()> { + v.encode(&mut self.value) + } + serialize_num!(nat8, u8, write_u8); + serialize_num!(nat16, u16, write_u16::); + serialize_num!(nat32, u32, write_u32::); + serialize_num!(nat64, u64, write_u64::); + + serialize_num!(int8, i8, write_i8); + serialize_num!(int16, i16, write_i16::); + serialize_num!(int32, i32, write_i32::); + serialize_num!(int64, i64, write_i64::); + + serialize_num!(float32, f32, write_f32::); + serialize_num!(float64, f64, write_f64::); + + fn serialize_text(self, v: &str) -> Result<()> { + let mut buf = Vec::from(v.as_bytes()); + self.write_leb128(buf.len() as u64)?; + self.value.append(&mut buf); + Ok(()) + } + fn serialize_null(self, _v: ()) -> Result<()> { + Ok(()) + } + fn serialize_empty(self) -> Result<()> { + Err(Error::msg("cannot encode empty type")) + } + fn serialize_principal(self, blob: &[u8]) -> Result<()> { + self.write(&[1])?; + self.write_leb128(blob.len() as u64)?; + self.write(blob)?; + Ok(()) + } + fn serialize_function(self, blob: &[u8], meth: &str) -> Result<()> { + self.write(&[1])?; + self.serialize_principal(blob)?; + self.serialize_text(meth) + } + fn serialize_option(self, v: Option<&T>) -> Result<()> + where + T: super::CandidType, + { + match v { + None => { + self.write_leb128(0)?; + Ok(()) + } + Some(v) => { + self.write_leb128(1)?; + v.idl_serialize(self) + } + } + } + fn serialize_variant(self, index: u64) -> Result { + self.write_leb128(index)?; + Ok(Self::Compound { ser: self }) + } + fn serialize_struct(self) -> Result { + Ok(Self::Compound { ser: self }) + } + fn serialize_vec(self, len: usize) -> Result { + self.write_leb128(len as u64)?; + Ok(Self::Compound { ser: self }) + } + fn serialize_blob(self, blob: &[u8]) -> Result<()> { + self.write_leb128(blob.len() as u64)?; + self.write(blob)?; + Ok(()) + } +} + +pub struct Compound<'a> { + ser: &'a mut ValueSerializer, +} +impl<'a> types::Compound for Compound<'a> { + type Error = Error; + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: types::CandidType, + { + value.idl_serialize(&mut *self.ser)?; + Ok(()) + } + fn serialize_blob(&mut self, blob: &[u8]) -> Result<()> { + use crate::types::Serializer; + self.ser.serialize_blob(blob) + } +} + +/// A structure for serializing Rust values to IDL types. +#[derive(Default)] +pub struct TypeSerialize { + type_table: Vec>, + type_map: HashMap, + env: TypeEnv, + args: Vec, + result: Vec, +} + +impl TypeSerialize { + #[inline] + pub fn new() -> Self { + TypeSerialize { + type_table: Vec::new(), + type_map: HashMap::new(), + env: TypeEnv::new(), + args: Vec::new(), + result: Vec::new(), + } + } + pub fn get_result(&self) -> &[u8] { + &self.result + } + #[inline] + fn build_type(&mut self, t: &Type) -> Result<()> { + if self.type_map.contains_key(t) { + return Ok(()); + } + let actual_type = if let TypeInner::Var(id) = t.as_ref() { + self.env.rec_find_type(id)? + } else { + t + } + .clone(); + if types::internal::is_primitive(&actual_type) { + return Ok(()); + } + // This is a hack to remove (some) equivalent mu types + // from the type table. + // Someone should implement Pottier's O(nlogn) algorithm + // http://gallium.inria.fr/~fpottier/publis/gauthier-fpottier-icfp04.pdf + let unrolled = types::internal::unroll(t); + if let Some(idx) = self.type_map.get(&unrolled) { + let idx = *idx; + self.type_map.insert(t.clone(), idx); + return Ok(()); + } + + let idx = self.type_table.len(); + self.type_map.insert(t.clone(), idx as i32); + self.type_table.push(Vec::new()); + let mut buf = Vec::new(); + match actual_type.as_ref() { + TypeInner::Opt(ref ty) => { + self.build_type(ty)?; + sleb128_encode(&mut buf, Opcode::Opt as i64)?; + self.encode(&mut buf, ty)?; + } + TypeInner::Vec(ref ty) => { + self.build_type(ty)?; + sleb128_encode(&mut buf, Opcode::Vec as i64)?; + self.encode(&mut buf, ty)?; + } + TypeInner::Record(fs) => { + for Field { ty, .. } in fs.iter() { + self.build_type(ty)?; + } + + sleb128_encode(&mut buf, Opcode::Record as i64)?; + leb128_encode(&mut buf, fs.len() as u64)?; + for Field { id, ty } in fs.iter() { + leb128_encode(&mut buf, u64::from(id.get_id()))?; + self.encode(&mut buf, ty)?; + } + } + TypeInner::Variant(fs) => { + for Field { ty, .. } in fs.iter() { + self.build_type(ty)?; + } + + sleb128_encode(&mut buf, Opcode::Variant as i64)?; + leb128_encode(&mut buf, fs.len() as u64)?; + for Field { id, ty } in fs.iter() { + leb128_encode(&mut buf, u64::from(id.get_id()))?; + self.encode(&mut buf, ty)?; + } + } + TypeInner::Service(ref ms) => { + for (_, ty) in ms.iter() { + self.build_type(ty)?; + } + sleb128_encode(&mut buf, Opcode::Service as i64)?; + leb128_encode(&mut buf, ms.len() as u64)?; + for (id, ty) in ms.iter() { + let mut name = Vec::from(id.as_bytes()); + leb128_encode(&mut buf, name.len() as u64)?; + buf.append(&mut name); + self.encode(&mut buf, ty)?; + } + } + TypeInner::Func(ref func) => { + for ty in func.args.iter().chain(func.rets.iter()) { + self.build_type(ty)?; + } + sleb128_encode(&mut buf, Opcode::Func as i64)?; + leb128_encode(&mut buf, func.args.len() as u64)?; + for ty in func.args.iter() { + self.encode(&mut buf, ty)?; + } + leb128_encode(&mut buf, func.rets.len() as u64)?; + for ty in func.rets.iter() { + self.encode(&mut buf, ty)?; + } + leb128_encode(&mut buf, func.modes.len() as u64)?; + for m in func.modes.iter() { + use crate::types::FuncMode; + let m = match m { + FuncMode::Query => 1, + FuncMode::Oneway => 2, + FuncMode::CompositeQuery => 3, + }; + sleb128_encode(&mut buf, m)?; + } + } + _ => unreachable!(), + }; + self.type_table[idx] = buf; + Ok(()) + } + #[doc(hidden)] + pub fn push_type(&mut self, t: &Type) -> Result<()> { + self.args.push(t.clone()); + self.build_type(t) + } + fn encode(&self, buf: &mut Vec, t: &Type) -> Result<()> { + if let TypeInner::Var(id) = t.as_ref() { + let actual_type = self.env.rec_find_type(id)?; + if types::internal::is_primitive(actual_type) { + return self.encode(buf, actual_type); + } + } + match t.as_ref() { + TypeInner::Null => sleb128_encode(buf, Opcode::Null as i64), + TypeInner::Bool => sleb128_encode(buf, Opcode::Bool as i64), + TypeInner::Nat => sleb128_encode(buf, Opcode::Nat as i64), + TypeInner::Int => sleb128_encode(buf, Opcode::Int as i64), + TypeInner::Nat8 => sleb128_encode(buf, Opcode::Nat8 as i64), + TypeInner::Nat16 => sleb128_encode(buf, Opcode::Nat16 as i64), + TypeInner::Nat32 => sleb128_encode(buf, Opcode::Nat32 as i64), + TypeInner::Nat64 => sleb128_encode(buf, Opcode::Nat64 as i64), + TypeInner::Int8 => sleb128_encode(buf, Opcode::Int8 as i64), + TypeInner::Int16 => sleb128_encode(buf, Opcode::Int16 as i64), + TypeInner::Int32 => sleb128_encode(buf, Opcode::Int32 as i64), + TypeInner::Int64 => sleb128_encode(buf, Opcode::Int64 as i64), + TypeInner::Float32 => sleb128_encode(buf, Opcode::Float32 as i64), + TypeInner::Float64 => sleb128_encode(buf, Opcode::Float64 as i64), + TypeInner::Text => sleb128_encode(buf, Opcode::Text as i64), + TypeInner::Reserved => sleb128_encode(buf, Opcode::Reserved as i64), + TypeInner::Empty => sleb128_encode(buf, Opcode::Empty as i64), + TypeInner::Principal => sleb128_encode(buf, Opcode::Principal as i64), + TypeInner::Knot(ref id) => { + let ty = types::internal::find_type(id) + .ok_or_else(|| Error::msg("knot TypeId not found"))?; + let idx = self + .type_map + .get(&ty) + .ok_or_else(|| Error::msg(format!("knot type {ty} not found")))?; + sleb128_encode(buf, i64::from(*idx)) + } + TypeInner::Var(_) => { + let idx = self + .type_map + .get(t) + .ok_or_else(|| Error::msg(format!("var type {t} not found")))?; + sleb128_encode(buf, i64::from(*idx)) + } + TypeInner::Future => unreachable!(), + _ => { + let idx = self + .type_map + .get(t) + .ok_or_else(|| Error::msg(format!("type {t} not found")))?; + sleb128_encode(buf, i64::from(*idx)) + } + }?; + Ok(()) + } + #[doc(hidden)] + pub fn serialize(&mut self) -> Result<()> { + leb128_encode(&mut self.result, self.type_table.len() as u64)?; + self.result.append(&mut self.type_table.concat()); + + leb128_encode(&mut self.result, self.args.len() as u64)?; + let mut ty_encode = Vec::new(); + for t in self.args.iter() { + self.encode(&mut ty_encode, t)?; + } + self.result.append(&mut ty_encode); + Ok(()) + } +} diff --git a/rust/candid_parser/src/types/arc.rs b/rust/candid_parser/src/types/arc.rs new file mode 100644 index 00000000..3d8214d9 --- /dev/null +++ b/rust/candid_parser/src/types/arc.rs @@ -0,0 +1,28 @@ +//! This module provides functions to serialize and deserialize types +//! under [std::sync::Arc] shared reference type. +//! +//! # Examples +//! +//! ``` +//! use candid::{CandidType, Deserialize}; +//! use serde_bytes::ByteBuf; +//! use std::sync::Arc; +//! +//! #[derive(CandidType, Deserialize, PartialEq)] +//! struct ArcBytes(#[serde(with = "candid::arc")] Arc); +//! ``` +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::sync::Arc; + +pub fn serialize( + data: &Arc, + serializer: S, +) -> Result { + T::serialize(data, serializer) +} + +pub fn deserialize<'de, T: Deserialize<'de>, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + T::deserialize(deserializer).map(Arc::new) +} diff --git a/rust/candid_parser/src/types/impls.rs b/rust/candid_parser/src/types/impls.rs new file mode 100644 index 00000000..3caf1077 --- /dev/null +++ b/rust/candid_parser/src/types/impls.rs @@ -0,0 +1,538 @@ +use super::internal::*; +use super::{CandidType, Compound, Serializer}; + +macro_rules! primitive_impl { + ($t:ty, $id:tt, $method:ident $($cast:tt)*) => { + impl CandidType for $t { + fn _ty() -> Type { TypeInner::$id.into() } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> where S: Serializer { + serializer.$method(*self $($cast)*) + } + } + }; +} + +primitive_impl!((), Null, serialize_null); +primitive_impl!(bool, Bool, serialize_bool); + +primitive_impl!(i8, Int8, serialize_int8); +primitive_impl!(i16, Int16, serialize_int16); +primitive_impl!(i32, Int32, serialize_int32); +primitive_impl!(i64, Int64, serialize_int64); + +primitive_impl!(u8, Nat8, serialize_nat8); +primitive_impl!(u16, Nat16, serialize_nat16); +primitive_impl!(u32, Nat32, serialize_nat32); +primitive_impl!(u64, Nat64, serialize_nat64); + +primitive_impl!(f32, Float32, serialize_float32); +primitive_impl!(f64, Float64, serialize_float64); + +// isize, usize always encode to 64bit to ensure the same behavior +// on different platforms. This is consistent with serde's convention +primitive_impl!(isize, Int64, serialize_int64 as i64); +primitive_impl!(usize, Nat64, serialize_nat64 as u64); + +impl CandidType for i128 { + fn _ty() -> Type { + TypeInner::Int.into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + serializer.serialize_int(&crate::Int::from(*self)) + } +} +impl CandidType for u128 { + fn _ty() -> Type { + TypeInner::Nat.into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + serializer.serialize_nat(&crate::Nat::from(*self)) + } +} + +impl CandidType for String { + fn _ty() -> Type { + TypeInner::Text.into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + serializer.serialize_text(self) + } +} +impl CandidType for str { + fn _ty() -> Type { + TypeInner::Text.into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + serializer.serialize_text(self) + } +} + +impl CandidType for std::path::Path { + fn _ty() -> Type { + TypeInner::Text.into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + use serde::ser::Error; + match self.to_str() { + Some(s) => s.idl_serialize(serializer), + None => Err(S::Error::custom("path contains invalid UTF-8 characters")), + } + } +} + +impl CandidType for std::path::PathBuf { + fn _ty() -> Type { + TypeInner::Text.into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + self.as_path().idl_serialize(serializer) + } +} + +impl CandidType for Option +where + T: CandidType, +{ + fn _ty() -> Type { + TypeInner::Opt(T::ty()).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + serializer.serialize_option(self.as_ref()) + } +} + +impl CandidType for [T] +where + T: CandidType, +{ + fn _ty() -> Type { + TypeInner::Vec(T::ty()).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + let mut ser = serializer.serialize_vec(self.len())?; + for e in self.iter() { + Compound::serialize_element(&mut ser, &e)?; + } + Ok(()) + } +} +impl CandidType for serde_bytes::ByteBuf { + fn _ty() -> Type { + TypeInner::Vec(TypeInner::Nat8.into()).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + serializer.serialize_blob(self.as_slice()) + } +} +impl CandidType for serde_bytes::Bytes { + fn _ty() -> Type { + TypeInner::Vec(TypeInner::Nat8.into()).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + serializer.serialize_blob(self) + } +} + +macro_rules! map_impl { + ($ty:ident < K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)* >) => { + impl CandidType for $ty + where + K: CandidType $(+ $kbound1 $(+ $kbound2)*)*, + V: CandidType, + $($typaram: $bound,)* + { + fn _ty() -> Type { + let tuple = TypeInner::Record(vec![ + Field { + id: Label::Id(0).into(), + ty: K::ty(), + }, + Field { + id: Label::Id(1).into(), + ty: V::ty(), + }, + ]).into(); + TypeInner::Vec(tuple).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + let mut ser = serializer.serialize_vec(self.len())?; + for e in self.iter() { + Compound::serialize_element(&mut ser, &e)?; + } + Ok(()) + } + } + } +} +macro_rules! seq_impl { + ($ty:ident < K $(: $kbound1:ident $(+ $kbound2:ident)*)* $(, $typaram:ident : $bound:ident)* >) => { + impl CandidType for $ty + where + K: CandidType $(+ $kbound1 $(+ $kbound2)*)*, + $($typaram: $bound,)* + { + fn _ty() -> Type { + TypeInner::Vec(K::ty()).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + let mut ser = serializer.serialize_vec(self.len())?; + for e in self.iter() { + Compound::serialize_element(&mut ser, &e)?; + } + Ok(()) + } + } + } +} +use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque}; +use std::hash::{BuildHasher, Hash}; +map_impl!(BTreeMap); +map_impl!(HashMap); + +seq_impl!(Vec); +seq_impl!(VecDeque); +seq_impl!(LinkedList); +seq_impl!(BinaryHeap); +seq_impl!(BTreeSet); +seq_impl!(HashSet); + +impl CandidType for [T; N] { + fn _ty() -> Type { + TypeInner::Vec(T::ty()).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + let mut ser = serializer.serialize_vec(N)?; + for e in self.iter() { + Compound::serialize_element(&mut ser, &e)?; + } + Ok(()) + } +} + +impl CandidType for Result +where + T: CandidType, + E: CandidType, +{ + fn _ty() -> Type { + TypeInner::Variant(vec![ + // Make sure the field id is sorted by idl_hash + Field { + id: Label::Named("Ok".to_owned()).into(), + ty: T::ty(), + }, + Field { + id: Label::Named("Err".to_owned()).into(), + ty: E::ty(), + }, + ]) + .into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + match *self { + Result::Ok(ref v) => { + let mut ser = serializer.serialize_variant(0)?; + Compound::serialize_element(&mut ser, v) + } + Result::Err(ref e) => { + let mut ser = serializer.serialize_variant(1)?; + Compound::serialize_element(&mut ser, e) + } + } + } +} + +impl CandidType for Box +where + T: ?Sized + CandidType, +{ + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + (**self).idl_serialize(serializer) + } +} + +impl CandidType for std::cmp::Reverse +where + T: CandidType, +{ + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + self.0.idl_serialize(serializer) + } +} + +impl<'a, T> CandidType for &'a T +where + T: ?Sized + CandidType, +{ + fn id() -> TypeId { + TypeId::of::<&T>() + } // ignore lifetime + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + (**self).idl_serialize(serializer) + } +} +impl<'a, T> CandidType for &'a mut T +where + T: ?Sized + CandidType, +{ + fn id() -> TypeId { + TypeId::of::<&T>() + } // ignore lifetime + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + (**self).idl_serialize(serializer) + } +} + +impl<'a, T> CandidType for std::borrow::Cow<'a, T> +where + T: ?Sized + CandidType + ToOwned, +{ + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + (**self).idl_serialize(serializer) + } +} + +impl CandidType for std::cell::Cell +where + T: CandidType + Copy, +{ + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + self.get().idl_serialize(serializer) + } +} + +impl CandidType for std::cell::RefCell +where + T: CandidType, +{ + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + use serde::ser::Error; + match self.try_borrow() { + Ok(v) => v.idl_serialize(serializer), + Err(_) => Err(S::Error::custom("already mutably borrowed")), + } + } +} + +impl CandidType for std::rc::Rc +where + T: CandidType, +{ + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + self.as_ref().idl_serialize(serializer) + } +} + +impl CandidType for std::sync::Arc +where + T: CandidType, +{ + fn _ty() -> Type { + T::ty() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + self.as_ref().idl_serialize(serializer) + } +} + +macro_rules! tuple_impls { + ($($len:expr => ($($n:tt $name:ident)+))+) => { + $( + impl<$($name),+> CandidType for ($($name,)+) + where + $($name: CandidType,)+ + { + fn _ty() -> Type { + TypeInner::Record(vec![ + $(Field{ id: Label::Id($n).into(), ty: $name::ty() },)+ + ]).into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where S: Serializer, + { + let mut ser = serializer.serialize_struct()?; + $( + Compound::serialize_element(&mut ser, &self.$n)?; + )+ + Ok(()) + } + } + )+ + } +} + +tuple_impls! { + 1 => (0 T0) + 2 => (0 T0 1 T1) + 3 => (0 T0 1 T1 2 T2) + 4 => (0 T0 1 T1 2 T2 3 T3) + 5 => (0 T0 1 T1 2 T2 3 T3 4 T4) + 6 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5) + 7 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6) + 8 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7) + 9 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8) + 10 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8 9 T9) + 11 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8 9 T9 10 T10) + 12 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8 9 T9 10 T10 11 T11) + 13 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8 9 T9 10 T10 11 T11 12 T12) + 14 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8 9 T9 10 T10 11 T11 12 T12 13 T13) + 15 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8 9 T9 10 T10 11 T11 12 T12 13 T13 14 T14) + 16 => (0 T0 1 T1 2 T2 3 T3 4 T4 5 T5 6 T6 7 T7 8 T8 9 T9 10 T10 11 T11 12 T12 13 T13 14 T14 15 T15) +} + +impl CandidType for std::time::SystemTime { + fn _ty() -> Type { + TypeInner::Record(vec![ + Field { + id: Label::Named("nanos_since_epoch".to_owned()).into(), + ty: u32::ty(), + }, + Field { + id: Label::Named("secs_since_epoch".to_owned()).into(), + ty: u64::ty(), + }, + ]) + .into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + use serde::ser::Error; + + let duration_since_epoch = self + .duration_since(std::time::UNIX_EPOCH) + .map_err(|_| S::Error::custom("SystemTime must be later than UNIX_EPOCH"))?; + + let secs: u64 = duration_since_epoch.as_secs(); + let nanos: u32 = duration_since_epoch.subsec_nanos(); + + let mut ser = serializer.serialize_struct()?; + ser.serialize_element(&nanos)?; + ser.serialize_element(&secs)?; + + Ok(()) + } +} + +impl CandidType for std::time::Duration { + fn _ty() -> Type { + TypeInner::Record(vec![ + Field { + id: Label::Named("secs".to_owned()).into(), + ty: u64::ty(), + }, + Field { + id: Label::Named("nanos".to_owned()).into(), + ty: u32::ty(), + }, + ]) + .into() + } + fn idl_serialize(&self, serializer: S) -> Result<(), S::Error> + where + S: Serializer, + { + let secs: u64 = self.as_secs(); + let nanos: u32 = self.subsec_nanos(); + + let mut ser = serializer.serialize_struct()?; + ser.serialize_element(&secs)?; + ser.serialize_element(&nanos)?; + + Ok(()) + } +} diff --git a/rust/candid_parser/src/types/internal.rs b/rust/candid_parser/src/types/internal.rs new file mode 100644 index 00000000..f731bb9d --- /dev/null +++ b/rust/candid_parser/src/types/internal.rs @@ -0,0 +1,588 @@ +use super::CandidType; +use crate::idl_hash; +use num_enum::TryFromPrimitive; +use std::cell::RefCell; +use std::collections::HashMap; +use std::fmt; + +// This is a re-implementation of std::any::TypeId to get rid of 'static constraint. +// The current TypeId doesn't consider lifetime while computing the hash, which is +// totally fine for Candid type, as we don't care about lifetime at all. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct TypeId { + id: usize, + pub name: &'static str, +} +impl TypeId { + pub fn of() -> Self { + let name = std::any::type_name::(); + TypeId { + id: TypeId::of:: as usize, + name, + } + } +} +impl std::fmt::Display for TypeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let name = NAME.with(|n| n.borrow_mut().get(self)); + write!(f, "{name}") + } +} +pub fn type_of(_: &T) -> TypeId { + TypeId::of::() +} + +#[derive(Default)] +struct TypeName { + type_name: HashMap, + name_index: HashMap, +} +impl TypeName { + fn get(&mut self, id: &TypeId) -> String { + match self.type_name.get(id) { + Some(n) => n.to_string(), + None => { + // The format of id.name is unspecified, and doesn't guarantee to be unique. + // Splitting by "::" is not ideal, as we can get types like std::Box, HashMap + // This is not a problem for correctness, but I may get misleading names. + let name = id.name.split('<').next().unwrap(); + let name = name.rsplit("::").next().unwrap(); + let name = name + .chars() + .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' }) + .collect::() + .trim_end_matches('_') + .to_string(); + let res = match self.name_index.get_mut(&name) { + None => { + self.name_index.insert(name.clone(), 0); + name + } + Some(v) => { + *v += 1; + format!("{name}_{v}") + } + }; + self.type_name.insert(id.clone(), res.clone()); + res + } + } + } +} + +/// Used for `candid_derive::export_service` to generate `TypeEnv` from `Type`. +/// +/// It performs a global rewriting of `Type` to resolve: +/// * Duplicate type names in different modules/namespaces. +/// * Give different names to instantiated polymorphic types. +/// * Find the type name of a recursive node `Knot(TypeId)` and convert to `Var` node. +/// +/// There are some drawbacks of this approach: +/// * The type name is based on `type_name::()`, whose format is unspecified and long. We use some regex to shorten the name. +/// * Several Rust types can map to the same Candid type, and we only get to remember one name (currently we choose the shortest name). As a result, some of the type names in Rust is lost. +/// * Unless we do equivalence checking, recursive types can be unrolled and assigned to multiple names. +#[derive(Default)] +pub struct TypeContainer { + pub env: crate::TypeEnv, +} +impl TypeContainer { + pub fn new() -> Self { + TypeContainer { + env: crate::TypeEnv::new(), + } + } + pub fn add(&mut self) -> Type { + let t = T::ty(); + self.go(&t) + } + fn go(&mut self, t: &Type) -> Type { + match t.as_ref() { + TypeInner::Opt(t) => TypeInner::Opt(self.go(t)), + TypeInner::Vec(t) => TypeInner::Vec(self.go(t)), + TypeInner::Record(fs) => { + let res: Type = TypeInner::Record( + fs.iter() + .map(|Field { id, ty }| Field { + id: id.clone(), + ty: self.go(ty), + }) + .collect(), + ) + .into(); + if t.is_tuple() { + return res; + } + let id = ID.with(|n| n.borrow().get(t).cloned()); + if let Some(id) = id { + self.env.0.insert(id.to_string(), res); + TypeInner::Var(id.to_string()) + } else { + // if the type is part of an enum, the id won't be recorded. + // we want to inline the type in this case. + return res; + } + } + TypeInner::Variant(fs) => { + let res: Type = TypeInner::Variant( + fs.iter() + .map(|Field { id, ty }| Field { + id: id.clone(), + ty: self.go(ty), + }) + .collect(), + ) + .into(); + let id = ID.with(|n| n.borrow().get(t).cloned()); + if let Some(id) = id { + self.env.0.insert(id.to_string(), res); + TypeInner::Var(id.to_string()) + } else { + return res; + } + } + TypeInner::Knot(id) => { + let name = id.to_string(); + let ty = ENV.with(|e| e.borrow().get(id).unwrap().clone()); + self.env.0.insert(id.to_string(), ty); + TypeInner::Var(name) + } + TypeInner::Func(func) => TypeInner::Func(Function { + modes: func.modes.clone(), + args: func.args.iter().map(|arg| self.go(arg)).collect(), + rets: func.rets.iter().map(|arg| self.go(arg)).collect(), + }), + TypeInner::Service(serv) => TypeInner::Service( + serv.iter() + .map(|(id, t)| (id.clone(), self.go(t))) + .collect(), + ), + TypeInner::Class(inits, ref ty) => { + TypeInner::Class(inits.iter().map(|t| self.go(t)).collect(), self.go(ty)) + } + t => t.clone(), + } + .into() + } +} + +#[derive(Debug, PartialEq, Hash, Eq, Clone)] +pub struct Type(pub std::rc::Rc); + +#[derive(Debug, PartialEq, Hash, Eq, Clone)] +pub enum TypeInner { + Null, + Bool, + Nat, + Int, + Nat8, + Nat16, + Nat32, + Nat64, + Int8, + Int16, + Int32, + Int64, + Float32, + Float64, + Text, + Reserved, + Empty, + Knot(TypeId), // For recursive types from Rust + Var(String), // For variables from Candid file + Unknown, + Opt(Type), + Vec(Type), + Record(Vec), + Variant(Vec), + Func(Function), + Service(Vec<(String, Type)>), + Class(Vec, Type), + Principal, + Future, +} +impl std::ops::Deref for Type { + type Target = TypeInner; + #[inline(always)] + fn deref(&self) -> &TypeInner { + self.0.deref() + } +} +impl AsRef for Type { + #[inline(always)] + fn as_ref(&self) -> &TypeInner { + self.0.as_ref() + } +} +impl From for Type { + fn from(t: TypeInner) -> Self { + Type(t.into()) + } +} +impl TypeInner { + pub fn is_tuple(&self) -> bool { + match self { + TypeInner::Record(ref fs) => { + for (i, field) in fs.iter().enumerate() { + if field.id.get_id() != (i as u32) { + return false; + } + } + true + } + _ => false, + } + } +} +impl Type { + pub fn is_tuple(&self) -> bool { + self.as_ref().is_tuple() + } + pub fn subst(&self, tau: &std::collections::BTreeMap) -> Self { + use TypeInner::*; + match self.as_ref() { + Var(id) => match tau.get(id) { + None => Var(id.to_string()), + Some(new_id) => Var(new_id.to_string()), + }, + Opt(t) => Opt(t.subst(tau)), + Vec(t) => Vec(t.subst(tau)), + Record(fs) => Record( + fs.iter() + .map(|Field { id, ty }| Field { + id: id.clone(), + ty: ty.subst(tau), + }) + .collect(), + ), + Variant(fs) => Variant( + fs.iter() + .map(|Field { id, ty }| Field { + id: id.clone(), + ty: ty.subst(tau), + }) + .collect(), + ), + Func(func) => { + let func = func.clone(); + Func(Function { + modes: func.modes, + args: func.args.into_iter().map(|t| t.subst(tau)).collect(), + rets: func.rets.into_iter().map(|t| t.subst(tau)).collect(), + }) + } + Service(serv) => Service( + serv.iter() + .map(|(meth, ty)| (meth.clone(), ty.subst(tau))) + .collect(), + ), + Class(args, ty) => Class(args.iter().map(|t| t.subst(tau)).collect(), ty.subst(tau)), + _ => return self.clone(), + } + .into() + } +} +impl fmt::Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", crate::bindings::candid::pp_ty(self).pretty(80)) + } +} +impl fmt::Display for TypeInner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + crate::bindings::candid::pp_ty_inner(self).pretty(80) + ) + } +} + +#[derive(Debug, Eq, Clone)] +pub enum Label { + Id(u32), + Named(String), + Unnamed(u32), +} + +impl Label { + pub fn get_id(&self) -> u32 { + match *self { + Label::Id(n) => n, + Label::Named(ref n) => idl_hash(n), + Label::Unnamed(n) => n, + } + } +} + +impl std::fmt::Display for Label { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Label::Id(n) | Label::Unnamed(n) => { + write!(f, "{}", super::number::pp_num_str(&n.to_string())) + } + Label::Named(id) => write!(f, "{id}"), + } + } +} + +impl PartialEq for Label { + fn eq(&self, other: &Self) -> bool { + self.get_id() == other.get_id() + } +} + +impl std::hash::Hash for Label { + fn hash(&self, _state: &mut H) { + self.get_id(); + } +} + +pub type SharedLabel = std::rc::Rc