From cc2dd2df787f57412976bad71b4e934c9a2c3fa4 Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Mon, 18 Jan 2021 17:56:54 -0800 Subject: [PATCH 1/7] Remove fuzzing --- Cargo.toml | 8 ++---- fuzz/.gitignore | 4 --- fuzz/Cargo.toml | 30 ---------------------- fuzz/fuzz_targets/serde_schema_try_into.rs | 7 ----- fuzz/fuzz_targets/validate.rs | 20 --------------- src/form.rs | 9 ------- src/schema.rs | 17 ------------ src/serde.rs | 27 ------------------- 8 files changed, 2 insertions(+), 120 deletions(-) delete mode 100644 fuzz/.gitignore delete mode 100644 fuzz/Cargo.toml delete mode 100644 fuzz/fuzz_targets/serde_schema_try_into.rs delete mode 100644 fuzz/fuzz_targets/validate.rs diff --git a/Cargo.toml b/Cargo.toml index 199e11e..0b3cedc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,7 @@ authors = ["JSON Type Definition Contributors"] edition = "2018" license = "MIT" -[features] -fuzz = ["arbitrary"] - [dependencies] -arbitrary = { version = "0.4.0", features = ["derive"], optional = true } chrono = "0.4" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" +serde = { version = "1", features = ["derive"] } +serde_json = "1" diff --git a/fuzz/.gitignore b/fuzz/.gitignore deleted file mode 100644 index 572e03b..0000000 --- a/fuzz/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ - -target -corpus -artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml deleted file mode 100644 index f96c540..0000000 --- a/fuzz/Cargo.toml +++ /dev/null @@ -1,30 +0,0 @@ - -[package] -name = "jtd-fuzz" -version = "0.0.0" -authors = ["Automatically generated"] -publish = false -edition = "2018" - -[package.metadata] -cargo-fuzz = true - -[dependencies] -libfuzzer-sys = "0.3.1" -serde_json = "1.0" - -[dependencies.jtd] -path = ".." -features = ["fuzz"] - -# Prevent this from interfering with workspaces -[workspace] -members = ["."] - -[[bin]] -name = "serde_schema_try_into" -path = "fuzz_targets/serde_schema_try_into.rs" - -[[bin]] -name = "validate" -path = "fuzz_targets/validate.rs" diff --git a/fuzz/fuzz_targets/serde_schema_try_into.rs b/fuzz/fuzz_targets/serde_schema_try_into.rs deleted file mode 100644 index ac3b081..0000000 --- a/fuzz/fuzz_targets/serde_schema_try_into.rs +++ /dev/null @@ -1,7 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|serde_schema: jtd::serde::Schema| { - use std::convert::TryInto; - let _: Result = serde_schema.try_into(); -}); diff --git a/fuzz/fuzz_targets/validate.rs b/fuzz/fuzz_targets/validate.rs deleted file mode 100644 index adb8417..0000000 --- a/fuzz/fuzz_targets/validate.rs +++ /dev/null @@ -1,20 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -use serde_json; - -fuzz_target!(|schema_and_instance: (jtd::schema::Schema, Vec)| { - let validator = jtd::validator::Validator { - max_errors: None, - max_depth: None, - }; - - // We're only interested in fuzzing against valid schemas. - if schema_and_instance.0.validate().is_err() { - return; - } - - if let Ok(instance) = serde_json::from_slice(&schema_and_instance.1) { - let _ = validator.validate(&schema_and_instance.0, &instance); - } -}); diff --git a/src/form.rs b/src/form.rs index a618c12..bc0cb73 100644 --- a/src/form.rs +++ b/src/form.rs @@ -4,7 +4,6 @@ use std::collections::BTreeSet; use std::str::FromStr; #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub enum Form { Empty, Ref(Ref), @@ -23,21 +22,18 @@ impl Default for Form { } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub struct Ref { pub nullable: bool, pub definition: String, } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub struct Type { pub nullable: bool, pub type_value: TypeValue, } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub enum TypeValue { Boolean, Float32, @@ -74,21 +70,18 @@ impl FromStr for TypeValue { } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub struct Enum { pub nullable: bool, pub values: BTreeSet, } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub struct Elements { pub nullable: bool, pub schema: Box, } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub struct Properties { pub nullable: bool, pub required: BTreeMap, @@ -98,14 +91,12 @@ pub struct Properties { } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub struct Values { pub nullable: bool, pub schema: Box, } #[derive(Clone, Debug, PartialEq)] -#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))] pub struct Discriminator { pub nullable: bool, pub discriminator: String, diff --git a/src/schema.rs b/src/schema.rs index 9dbe570..ff2723c 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -11,23 +11,6 @@ pub struct Schema { pub metadata: BTreeMap, } -#[cfg(feature = "fuzz")] -impl arbitrary::Arbitrary for Schema { - fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result { - Ok(Schema { - definitions: arbitrary::Arbitrary::arbitrary(u)?, - form: arbitrary::Arbitrary::arbitrary(u)?, - - // serde_json::Value does not derive Arbitrary. That's ok, because - // for the fuzz tests we're doing, we don't really care about - // manipulating arbitrary JSON values. - // - // So we'll always have metadata be None. - metadata: BTreeMap::new(), - }) - } -} - #[derive(Debug, PartialEq)] pub enum SerdeConvertError { InvalidForm, diff --git a/src/serde.rs b/src/serde.rs index 55ceb3a..983f446 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -47,33 +47,6 @@ pub struct Schema { pub metadata: Option>, } -#[cfg(feature = "fuzz")] -impl arbitrary::Arbitrary for Schema { - fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result { - Ok(Schema { - definitions: arbitrary::Arbitrary::arbitrary(u)?, - nullable: arbitrary::Arbitrary::arbitrary(u)?, - ref_: arbitrary::Arbitrary::arbitrary(u)?, - type_: arbitrary::Arbitrary::arbitrary(u)?, - enum_: arbitrary::Arbitrary::arbitrary(u)?, - elements: arbitrary::Arbitrary::arbitrary(u)?, - properties: arbitrary::Arbitrary::arbitrary(u)?, - optional_properties: arbitrary::Arbitrary::arbitrary(u)?, - additional_properties: arbitrary::Arbitrary::arbitrary(u)?, - values: arbitrary::Arbitrary::arbitrary(u)?, - discriminator: arbitrary::Arbitrary::arbitrary(u)?, - mapping: arbitrary::Arbitrary::arbitrary(u)?, - - // serde_json::Value does not derive Arbitrary. That's ok, because - // for the fuzz tests we're doing, we don't really care about - // manipulating arbitrary JSON values. - // - // So we'll always have metadata be None. - metadata: None, - }) - } -} - impl From for Schema { fn from(schema: schema::Schema) -> Schema { use crate::form; From 1a3c762f66ef47d42e77eff996fdd598fa67859a Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Tue, 19 Jan 2021 13:08:09 -0800 Subject: [PATCH 2/7] Begin refactor to enum-based schema representation --- Cargo.toml | 1 + src/form.rs | 125 ----- src/lib.rs | 30 +- src/schema.rs | 1110 +++++++++++++------------------------------ src/serde.rs | 433 ----------------- src/serde_schema.rs | 47 ++ src/validate.rs | 424 +++++++++++++++++ src/validator.rs | 436 ----------------- 8 files changed, 830 insertions(+), 1776 deletions(-) delete mode 100644 src/form.rs delete mode 100644 src/serde.rs create mode 100644 src/serde_schema.rs create mode 100644 src/validate.rs delete mode 100644 src/validator.rs diff --git a/Cargo.toml b/Cargo.toml index 0b3cedc..4c99012 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ license = "MIT" chrono = "0.4" serde = { version = "1", features = ["derive"] } serde_json = "1" +thiserror = "1" diff --git a/src/form.rs b/src/form.rs deleted file mode 100644 index bc0cb73..0000000 --- a/src/form.rs +++ /dev/null @@ -1,125 +0,0 @@ -use crate::schema::Schema; -use std::collections::BTreeMap; -use std::collections::BTreeSet; -use std::str::FromStr; - -#[derive(Clone, Debug, PartialEq)] -pub enum Form { - Empty, - Ref(Ref), - Type(Type), - Enum(Enum), - Elements(Elements), - Properties(Properties), - Values(Values), - Discriminator(Discriminator), -} - -impl Default for Form { - fn default() -> Self { - Form::Empty - } -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Ref { - pub nullable: bool, - pub definition: String, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Type { - pub nullable: bool, - pub type_value: TypeValue, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum TypeValue { - Boolean, - Float32, - Float64, - Int8, - Uint8, - Int16, - Uint16, - Int32, - Uint32, - String, - Timestamp, -} - -impl FromStr for TypeValue { - type Err = (); - - fn from_str(s: &str) -> Result { - match s { - "boolean" => Ok(Self::Boolean), - "float32" => Ok(Self::Float32), - "float64" => Ok(Self::Float64), - "int8" => Ok(Self::Int8), - "uint8" => Ok(Self::Uint8), - "int16" => Ok(Self::Int16), - "uint16" => Ok(Self::Uint16), - "int32" => Ok(Self::Int32), - "uint32" => Ok(Self::Uint32), - "string" => Ok(Self::String), - "timestamp" => Ok(Self::Timestamp), - _ => Err(()), - } - } -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Enum { - pub nullable: bool, - pub values: BTreeSet, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Elements { - pub nullable: bool, - pub schema: Box, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Properties { - pub nullable: bool, - pub required: BTreeMap, - pub optional: BTreeMap, - pub additional: bool, - pub has_required: bool, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Values { - pub nullable: bool, - pub schema: Box, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Discriminator { - pub nullable: bool, - pub discriminator: String, - pub mapping: BTreeMap, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn type_value_from_str() { - assert_eq!(Err(()), "Boolean".parse::()); - assert_eq!(Ok(TypeValue::Boolean), "boolean".parse()); - assert_eq!(Ok(TypeValue::Float32), "float32".parse()); - assert_eq!(Ok(TypeValue::Float64), "float64".parse()); - assert_eq!(Ok(TypeValue::Int8), "int8".parse()); - assert_eq!(Ok(TypeValue::Uint8), "uint8".parse()); - assert_eq!(Ok(TypeValue::Int16), "int16".parse()); - assert_eq!(Ok(TypeValue::Uint16), "uint16".parse()); - assert_eq!(Ok(TypeValue::Int32), "int32".parse()); - assert_eq!(Ok(TypeValue::Uint32), "uint32".parse()); - assert_eq!(Ok(TypeValue::String), "string".parse()); - assert_eq!(Ok(TypeValue::Timestamp), "timestamp".parse()); - } -} diff --git a/src/lib.rs b/src/lib.rs index 0d768fd..2d12f51 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,17 +1,17 @@ -pub mod form; -pub mod schema; -pub mod serde; -pub mod validator; +// pub mod form; +// pub mod schema; +// pub mod serde; +// pub mod validator; -pub use crate::serde::Schema as SerdeSchema; -pub use form::Form; -pub use schema::Schema; -pub use validator::{ValidationError, Validator}; +// pub use crate::serde::Schema as SerdeSchema; +// pub use form::Form; +// pub use schema::Schema; +// pub use validator::{ValidationError, Validator}; -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - assert_eq!(2 + 2, 4); - } -} +mod schema; +mod serde_schema; +mod validate; + +pub use schema::*; +pub use serde_schema::*; +pub use validate::*; diff --git a/src/schema.rs b/src/schema.rs index ff2723c..cfd4964 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -1,31 +1,114 @@ -use crate::form; -use crate::serde; +use crate::SerdeSchema; use serde_json::Value; use std::collections::{BTreeMap, BTreeSet}; -use std::convert::{TryFrom, TryInto}; +use thiserror::Error; + +pub type Definitions = BTreeMap; +pub type Metadata = BTreeMap; + +#[derive(Clone, Debug, PartialEq)] +pub enum Schema { + Empty { + definitions: Definitions, + metadata: Metadata, + }, + Ref { + definitions: Definitions, + metadata: Metadata, + nullable: bool, + ref_: String, + }, + Type { + definitions: Definitions, + metadata: Metadata, + nullable: bool, + type_: Type, + }, + Enum { + definitions: Definitions, + metadata: Metadata, + nullable: bool, + enum_: BTreeSet, + }, + Elements { + definitions: Definitions, + metadata: Metadata, + nullable: bool, + elements: Box, + }, + Properties { + definitions: Definitions, + metadata: Metadata, + nullable: bool, + properties: BTreeMap, + optional_properties: BTreeMap, + properties_is_present: bool, + additional_properties: bool, + }, + Values { + definitions: Definitions, + metadata: Metadata, + nullable: bool, + values: Box, + }, + Discriminator { + definitions: Definitions, + metadata: Metadata, + nullable: bool, + discriminator: String, + mapping: BTreeMap, + }, +} -#[derive(Clone, Debug, Default, PartialEq)] -pub struct Schema { - pub definitions: BTreeMap, - pub form: form::Form, - pub metadata: BTreeMap, +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Type { + Boolean, + Int8, + Uint8, + Int16, + Uint16, + Int32, + Uint32, + Float32, + Float64, + String, + Timestamp, } -#[derive(Debug, PartialEq)] -pub enum SerdeConvertError { +#[derive(Clone, Debug, PartialEq, Eq, Error)] +pub enum FromSerdeSchemaError { + #[error("invalid combination of keywords in schema")] InvalidForm, + + #[error("invalid type: {0:?}")] InvalidType(String), + + #[error("duplicated enum value: {0:?}")] DuplicatedEnumValue(String), } -#[derive(Debug, PartialEq)] -pub enum ValidateError { +#[derive(Clone, Debug, PartialEq, Eq, Error)] +pub enum SchemaValidateError { + #[error("no such definition: {0:?}")] NoSuchDefinition(String), + + #[error("non-root definitions")] NonRootDefinitions, + + #[error("empty enum")] EmptyEnum, + + #[error("property repeated in optionalProperties: {0:?}")] RepeatedProperty(String), - MappingNullable, - MappingNotPropertiesForm, + + #[error("nullable schema in mapping")] + NullableMapping, + + #[error("non-properties schema in mapping")] + NonPropertiesMapping, + + #[error("discriminator redefined in mapping: {0:?}")] + RepeatedDiscriminator(String), } // Index of valid form "signatures" -- i.e., combinations of the presence of the @@ -88,843 +171,336 @@ const VALID_FORM_SIGNATURES: [[bool; 10]; 13] = [ ]; impl Schema { - pub fn validate(&self) -> Result<(), ValidateError> { - self.validate_with_root(None) - } - - fn validate_with_root(&self, root: Option<&Self>) -> Result<(), ValidateError> { - // If root is non-None, then self is not the root schema. We should - // therefore not tolerate definitions being placed on this schema. - if root.is_some() && !self.definitions.is_empty() { - return Err(ValidateError::NonRootDefinitions); - } - - // This root variable is the one we will use for recursive calls to - // validate_with_root. - // - // If we are at the top-level call of validate_with_root (invoked by the - // public validate method) wherein root is None, then root will be - // Some(self) for the recursive calls, since we ourselves are the root. - let root = root.or(Some(self)); - - // Validate each definition, if any. - for sub_schema in self.definitions.values() { - sub_schema.validate_with_root(root)?; + pub fn from_serde_schema(serde_schema: SerdeSchema) -> Result { + let mut definitions = BTreeMap::new(); + for (name, sub_schema) in serde_schema.definitions.unwrap_or_default() { + definitions.insert(name, Self::from_serde_schema(sub_schema)?); } - match &self.form { - form::Form::Empty | form::Form::Type(_) => {} - form::Form::Enum(form::Enum { values, .. }) => { - if values.is_empty() { - return Err(ValidateError::EmptyEnum); - } - } - form::Form::Ref(form::Ref { definition, .. }) => { - // This unwrap is safe because the assignment to root above - // guarantees root will be non-None. - if !root.unwrap().definitions.contains_key(definition) { - return Err(ValidateError::NoSuchDefinition(definition.clone())); - } - } - form::Form::Elements(form::Elements { schema, .. }) => { - schema.validate_with_root(root)?; - } - form::Form::Properties(form::Properties { - required, optional, .. - }) => { - for schema in required.values() { - schema.validate_with_root(root)?; - } - - for (name, schema) in optional { - if required.contains_key(name) { - return Err(ValidateError::RepeatedProperty(name.clone())); - } - - schema.validate_with_root(root)?; - } - } - form::Form::Values(form::Values { schema, .. }) => { - schema.validate_with_root(root)?; - } - form::Form::Discriminator(form::Discriminator { - discriminator, - mapping, - .. - }) => { - for schema in mapping.values() { - schema.validate_with_root(root)?; - - match &schema.form { - form::Form::Properties(form::Properties { - required, - optional, - nullable, - .. - }) => { - if *nullable { - return Err(ValidateError::MappingNullable); - } - - if required.contains_key(discriminator) - || optional.contains_key(discriminator) - { - return Err(ValidateError::RepeatedProperty(discriminator.clone())); - } - } - _ => { - return Err(ValidateError::MappingNotPropertiesForm); - } - } - } - } - }; - - Ok(()) - } -} - -impl TryFrom for Schema { - type Error = SerdeConvertError; + let metadata = serde_schema.metadata.unwrap_or_default(); + let nullable = serde_schema.nullable.unwrap_or(false); - fn try_from(schema: serde::Schema) -> Result { + // Ensure the schema is using a valid combination of keywords. let form_signature = [ - schema.ref_.is_some(), - schema.type_.is_some(), - schema.enum_.is_some(), - schema.elements.is_some(), - schema.properties.is_some(), - schema.optional_properties.is_some(), - schema.additional_properties.is_some(), - schema.values.is_some(), - schema.discriminator.is_some(), - schema.mapping.is_some(), + serde_schema.ref_.is_some(), + serde_schema.type_.is_some(), + serde_schema.enum_.is_some(), + serde_schema.elements.is_some(), + serde_schema.properties.is_some(), + serde_schema.optional_properties.is_some(), + serde_schema.additional_properties.is_some(), + serde_schema.values.is_some(), + serde_schema.discriminator.is_some(), + serde_schema.mapping.is_some(), ]; if !VALID_FORM_SIGNATURES.contains(&form_signature) { - return Err(SerdeConvertError::InvalidForm); + return Err(FromSerdeSchemaError::InvalidForm); } - let mut definitions = BTreeMap::new(); - for (name, sub_schema) in schema.definitions.unwrap_or_default() { - definitions.insert(name, sub_schema.try_into()?); - } - - if let Some(ref_) = schema.ref_ { - return Ok(Schema { + // From here on out, we can use the presence of certain keywords to + // determine the form the schema takes on. + // + // We'll handle the empty form as a fallback, and handle the other forms + // in standard order. + if let Some(ref_) = serde_schema.ref_ { + return Ok(Schema::Ref { definitions, - form: form::Form::Ref(form::Ref { - nullable: schema.nullable.unwrap_or_default(), - definition: ref_, - }), - metadata: schema.metadata.unwrap_or_default(), + metadata, + nullable, + ref_, }); } - if let Some(type_) = schema.type_ { - return Ok(Schema { + if let Some(type_) = serde_schema.type_ { + let type_ = match &type_[..] { + "boolean" => Type::Boolean, + "int8" => Type::Int8, + "uint8" => Type::Uint8, + "int16" => Type::Int16, + "uint16" => Type::Uint16, + "int32" => Type::Int32, + "uint32" => Type::Uint32, + "float32" => Type::Float32, + "float64" => Type::Float64, + "string" => Type::String, + "timestamp" => Type::Timestamp, + _ => return Err(FromSerdeSchemaError::InvalidType(type_)), + }; + + return Ok(Schema::Type { definitions, - form: form::Form::Type(form::Type { - nullable: schema.nullable.unwrap_or_default(), - type_value: type_ - .parse() - .map_err(|_| SerdeConvertError::InvalidType(type_))?, - }), - metadata: schema.metadata.unwrap_or_default(), + metadata, + nullable, + type_, }); } - if let Some(enum_) = schema.enum_ { + if let Some(enum_) = serde_schema.enum_ { + // We do this construction by hand, rather than using collect, to + // detect the case of an enum value being repeated. This can't be + // detected once the values are put in the set. let mut values = BTreeSet::new(); - for val in enum_ { - if values.contains(&val) { - return Err(SerdeConvertError::DuplicatedEnumValue(val)); + for value in enum_ { + if values.contains(&value) { + return Err(FromSerdeSchemaError::DuplicatedEnumValue(value)); } - values.insert(val); + values.insert(value); } - return Ok(Schema { + return Ok(Schema::Enum { definitions, - form: form::Form::Enum(form::Enum { - nullable: schema.nullable.unwrap_or_default(), - values, - }), - metadata: schema.metadata.unwrap_or_default(), + metadata, + nullable, + enum_: values, }); } - if let Some(elements) = schema.elements { - return Ok(Schema { + if let Some(elements) = serde_schema.elements { + return Ok(Schema::Elements { definitions, - form: form::Form::Elements(form::Elements { - nullable: schema.nullable.unwrap_or_default(), - schema: Box::new((*elements).try_into()?), - }), - metadata: schema.metadata.unwrap_or_default(), + metadata, + nullable, + elements: Box::new(Self::from_serde_schema(*elements)?), }); } - if schema.properties.is_some() || schema.optional_properties.is_some() { - let has_required = schema.properties.is_some(); + if serde_schema.properties.is_some() || serde_schema.optional_properties.is_some() { + let properties_is_present = serde_schema.properties.is_some(); + let additional_properties = serde_schema.additional_properties.unwrap_or(false); - let mut required = BTreeMap::new(); - for (name, sub_schema) in schema.properties.unwrap_or_default() { - required.insert(name, sub_schema.try_into()?); + let mut properties = BTreeMap::new(); + for (name, sub_schema) in serde_schema.properties.unwrap_or_default() { + properties.insert(name, Self::from_serde_schema(sub_schema)?); } - let mut optional = BTreeMap::new(); - for (name, sub_schema) in schema.optional_properties.unwrap_or_default() { - optional.insert(name, sub_schema.try_into()?); + let mut optional_properties = BTreeMap::new(); + for (name, sub_schema) in serde_schema.optional_properties.unwrap_or_default() { + optional_properties.insert(name, Self::from_serde_schema(sub_schema)?); } - return Ok(Schema { + return Ok(Schema::Properties { definitions, - form: form::Form::Properties(form::Properties { - nullable: schema.nullable.unwrap_or_default(), - required, - optional, - additional: schema.additional_properties.unwrap_or_default(), - has_required, - }), - metadata: schema.metadata.unwrap_or_default(), + metadata, + nullable, + properties, + optional_properties, + properties_is_present, + additional_properties, }); } - if let Some(values) = schema.values { - return Ok(Schema { + if let Some(values) = serde_schema.values { + return Ok(Schema::Values { definitions, - form: form::Form::Values(form::Values { - nullable: schema.nullable.unwrap_or_default(), - schema: Box::new((*values).try_into()?), - }), - metadata: schema.metadata.unwrap_or_default(), + metadata, + nullable, + values: Box::new(Self::from_serde_schema(*values)?), }); } - if let Some(discriminator) = schema.discriminator { + if let Some(discriminator) = serde_schema.discriminator { + // This is safe because the form signature check ensures mapping is + // present if discriminator is present. let mut mapping = BTreeMap::new(); - for (name, sub_schema) in schema.mapping.unwrap() { - mapping.insert(name, sub_schema.try_into()?); + for (name, sub_schema) in serde_schema.mapping.unwrap() { + mapping.insert(name, Self::from_serde_schema(sub_schema)?); } - return Ok(Schema { + return Ok(Schema::Discriminator { definitions, - form: form::Form::Discriminator(form::Discriminator { - nullable: schema.nullable.unwrap_or_default(), - discriminator, - mapping, - }), - metadata: schema.metadata.unwrap_or_default(), + metadata, + nullable, + discriminator, + mapping, }); } - Ok(Schema { + Ok(Schema::Empty { definitions, - form: form::Form::Empty, - metadata: schema.metadata.unwrap_or_default(), + metadata, }) } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - use std::convert::TryInto; - - #[test] - fn from_empty() { - assert_eq!( - Ok(Schema { - form: form::Form::Empty, - ..Default::default() - }), - serde_json::from_value::(json!({})) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_empty_with_metadata() { - assert_eq!( - Ok(Schema { - form: form::Form::Empty, - metadata: vec![("foo".to_owned(), json!("bar"))].into_iter().collect(), - ..Default::default() - }), - serde_json::from_value::(json!({ - "metadata": { - "foo": "bar" - } - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_empty_with_definitions() { - assert_eq!( - Ok(Schema { - form: form::Form::Empty, - definitions: vec![("foo".to_owned(), Default::default())] - .into_iter() - .collect(), - ..Default::default() - }), - serde_json::from_value::(json!({ - "definitions": { - "foo": {} - } - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_ref() { - assert_eq!( - Ok(Schema { - form: form::Form::Ref(form::Ref { - nullable: false, - definition: "foo".to_owned(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "ref": "foo", - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_ref_with_nullable() { - assert_eq!( - Ok(Schema { - form: form::Form::Ref(form::Ref { - nullable: true, - definition: "foo".to_owned(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "ref": "foo", - "nullable": true, - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_type() { - assert_eq!( - Ok(Schema { - form: form::Form::Type(form::Type { - nullable: false, - type_value: form::TypeValue::Boolean - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "type": "boolean", - })) - .unwrap() - .try_into(), - ) - } - #[test] - fn from_type_with_nullable() { - assert_eq!( - Ok(Schema { - form: form::Form::Type(form::Type { - nullable: true, - type_value: form::TypeValue::Boolean - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "type": "boolean", - "nullable": true, - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_type_with_invalid_value() { - let result: Result = - serde_json::from_value::(json!({ - "type": "foo", - })) - .unwrap() - .try_into(); - - assert_eq!( - Err(SerdeConvertError::InvalidType("foo".to_owned())), - result - ) - } - - #[test] - fn from_enum() { - assert_eq!( - Ok(Schema { - form: form::Form::Enum(form::Enum { - nullable: false, - values: vec!["foo".to_owned(), "bar".to_owned()] - .into_iter() - .collect(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "enum": ["foo", "bar"], - })) - .unwrap() - .try_into(), - ) + pub fn validate(&self) -> Result<(), SchemaValidateError> { + self._validate(None) } - #[test] - fn from_enum_with_nullable() { - assert_eq!( - Ok(Schema { - form: form::Form::Enum(form::Enum { - nullable: true, - values: vec!["foo".to_owned(), "bar".to_owned()] - .into_iter() - .collect(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "enum": ["foo", "bar"], - "nullable": true, - })) - .unwrap() - .try_into(), - ) - } + fn _validate(&self, root: Option<&Self>) -> Result<(), SchemaValidateError> { + let sub_root = root.or(Some(self)); - #[test] - fn from_enum_with_repeated_value() { - let result: Result = - serde_json::from_value::(json!({ - "enum": ["foo", "bar", "foo"], - })) - .unwrap() - .try_into(); - - assert_eq!( - Err(SerdeConvertError::DuplicatedEnumValue("foo".to_owned())), - result - ) - } - - #[test] - fn from_elements() { - assert_eq!( - Ok(Schema { - form: form::Form::Elements(form::Elements { - nullable: false, - schema: Default::default(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "elements": {}, - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_elements_with_nullable() { - assert_eq!( - Ok(Schema { - form: form::Form::Elements(form::Elements { - nullable: true, - schema: Default::default(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "elements": {}, - "nullable": true, - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_properties() { - assert_eq!( - Ok(Schema { - form: form::Form::Properties(form::Properties { - nullable: false, - required: vec![("foo".to_owned(), Default::default())] - .into_iter() - .collect(), - optional: vec![("bar".to_owned(), Default::default())] - .into_iter() - .collect(), - additional: false, - has_required: true, - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "properties": { - "foo": {}, - }, - "optionalProperties": { - "bar": {}, - }, - })) - .unwrap() - .try_into(), - ) - } - - #[test] - fn from_properties_without_optional() { - assert_eq!( - Ok(Schema { - form: form::Form::Properties(form::Properties { - nullable: false, - required: vec![("foo".to_owned(), Default::default())] - .into_iter() - .collect(), - optional: BTreeMap::new(), - additional: false, - has_required: true, - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "properties": { - "foo": {}, - }, - })) - .unwrap() - .try_into(), - ) - } + if root.is_some() && !self.definitions().is_empty() { + return Err(SchemaValidateError::NonRootDefinitions); + } - #[test] - fn from_properties_without_required() { - assert_eq!( - Ok(Schema { - form: form::Form::Properties(form::Properties { - nullable: false, - required: BTreeMap::new(), - optional: vec![("foo".to_owned(), Default::default())] - .into_iter() - .collect(), - additional: false, - has_required: false, - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "optionalProperties": { - "foo": {}, - }, - })) - .unwrap() - .try_into(), - ) - } + for sub_schema in self.definitions().values() { + sub_schema._validate(sub_root)?; + } - #[test] - fn from_properties_with_additional() { - assert_eq!( - Ok(Schema { - form: form::Form::Properties(form::Properties { - nullable: false, - required: vec![("foo".to_owned(), Default::default())] - .into_iter() - .collect(), - optional: vec![("bar".to_owned(), Default::default())] - .into_iter() - .collect(), - additional: true, - has_required: true, - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "properties": { - "foo": {}, - }, - "optionalProperties": { - "bar": {}, - }, - "additionalProperties": true, - })) - .unwrap() - .try_into(), - ) - } + match self { + Self::Empty { .. } => {} + Self::Ref { ref_, .. } => { + if !sub_root + .map(|r| r.definitions()) + .unwrap() + .contains_key(ref_) + { + return Err(SchemaValidateError::NoSuchDefinition(ref_.clone())); + } + } + Self::Type { .. } => {} + Self::Enum { enum_, .. } => { + if enum_.is_empty() { + return Err(SchemaValidateError::EmptyEnum); + } + } + Self::Elements { elements, .. } => { + elements._validate(sub_root)?; + } + Self::Properties { + properties, + optional_properties, + .. + } => { + for key in properties.keys() { + if optional_properties.contains_key(key) { + return Err(SchemaValidateError::RepeatedProperty(key.clone())); + } + } - #[test] - fn from_properties_with_nullable() { - assert_eq!( - Ok(Schema { - form: form::Form::Properties(form::Properties { - nullable: true, - required: vec![("foo".to_owned(), Default::default())] - .into_iter() - .collect(), - optional: vec![("bar".to_owned(), Default::default())] - .into_iter() - .collect(), - additional: false, - has_required: true, - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "properties": { - "foo": {}, - }, - "optionalProperties": { - "bar": {}, - }, - "nullable": true, - })) - .unwrap() - .try_into(), - ) - } + for sub_schema in properties.values() { + sub_schema._validate(sub_root)?; + } - #[test] - fn from_values() { - assert_eq!( - Ok(Schema { - form: form::Form::Values(form::Values { - nullable: false, - schema: Default::default(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "values": {}, - })) - .unwrap() - .try_into(), - ) - } + for sub_schema in optional_properties.values() { + sub_schema._validate(sub_root)?; + } + } + Self::Values { values, .. } => { + values._validate(sub_root)?; + } + Self::Discriminator { + discriminator, + mapping, + .. + } => { + for sub_schema in mapping.values() { + if let Self::Properties { + nullable, + properties, + optional_properties, + .. + } = sub_schema + { + if *nullable { + return Err(SchemaValidateError::NullableMapping); + } - #[test] - fn from_values_with_nullable() { - assert_eq!( - Ok(Schema { - form: form::Form::Values(form::Values { - nullable: true, - schema: Default::default(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "values": {}, - "nullable": true, - })) - .unwrap() - .try_into(), - ) - } + if properties.contains_key(discriminator) + || optional_properties.contains_key(discriminator) + { + return Err(SchemaValidateError::RepeatedDiscriminator( + discriminator.clone(), + )); + } + } else { + return Err(SchemaValidateError::NonPropertiesMapping); + } - #[test] - fn from_discriminator() { - assert_eq!( - Ok(Schema { - form: form::Form::Discriminator(form::Discriminator { - nullable: false, - discriminator: "foo".to_owned(), - mapping: vec![("bar".to_owned(), Default::default())] - .into_iter() - .collect(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "discriminator": "foo", - "mapping": { - "bar": {} + sub_schema._validate(sub_root)?; } - })) - .unwrap() - .try_into(), - ) - } + } + } - #[test] - fn from_discriminator_with_nullable() { - assert_eq!( - Ok(Schema { - form: form::Form::Discriminator(form::Discriminator { - nullable: true, - discriminator: "foo".to_owned(), - mapping: vec![("bar".to_owned(), Default::default())] - .into_iter() - .collect(), - }), - ..Default::default() - }), - serde_json::from_value::(json!({ - "discriminator": "foo", - "mapping": { - "bar": {} - }, - "nullable": true, - })) - .unwrap() - .try_into(), - ) + Ok(()) } - #[test] - fn from_invalid_forms() { - let invalid_forms = vec![ - json!({"ref": "foo", "type": "uint32"}), - json!({"type": "uint32", "enum": ["foo"]}), - json!({"enum": ["foo"], "elements": {}}), - json!({"elements": {}, "properties": {}}), - json!({"elements": {}, "optionalProperties": {}}), - json!({"elements": {}, "additionalProperties": true}), - json!({"properties": {}, "values": {}}), - json!({"values": {}, "discriminator": "foo"}), - json!({"discriminator": "foo"}), - json!({"mapping": {}}), - ]; - - for invalid_form in invalid_forms { - let result: Result = - serde_json::from_value::(invalid_form) - .unwrap() - .try_into(); - assert_eq!(Err(SerdeConvertError::InvalidForm), result); + pub fn definitions(&self) -> &BTreeMap { + match self { + Self::Empty { definitions, .. } => definitions, + Self::Ref { definitions, .. } => definitions, + Self::Enum { definitions, .. } => definitions, + Self::Type { definitions, .. } => definitions, + Self::Elements { definitions, .. } => definitions, + Self::Properties { definitions, .. } => definitions, + Self::Values { definitions, .. } => definitions, + Self::Discriminator { definitions, .. } => definitions, } } - #[test] - fn from_empty_with_definitions_containing_definitions() { - let schema: Schema = serde_json::from_value::(json!({ - "definitions": { - "foo": { - "definitions": {"foo": {}} - } - }, - })) - .unwrap() - .try_into() - .unwrap(); - - assert_eq!(Err(ValidateError::NonRootDefinitions), schema.validate()); + pub fn metadata(&self) -> &BTreeMap { + match self { + Self::Empty { metadata, .. } => metadata, + Self::Ref { metadata, .. } => metadata, + Self::Enum { metadata, .. } => metadata, + Self::Type { metadata, .. } => metadata, + Self::Elements { metadata, .. } => metadata, + Self::Properties { metadata, .. } => metadata, + Self::Values { metadata, .. } => metadata, + Self::Discriminator { metadata, .. } => metadata, + } } - #[test] - fn from_enum_with_empty_array() { - let schema: Schema = serde_json::from_value::(json!({ - "enum": [] - })) - .unwrap() - .try_into() - .unwrap(); - - assert_eq!(Err(ValidateError::EmptyEnum), schema.validate()); + pub fn nullable(&self) -> bool { + match self { + Self::Empty { .. } => true, + Self::Ref { nullable, .. } => *nullable, + Self::Enum { nullable, .. } => *nullable, + Self::Type { nullable, .. } => *nullable, + Self::Elements { nullable, .. } => *nullable, + Self::Properties { nullable, .. } => *nullable, + Self::Values { nullable, .. } => *nullable, + Self::Discriminator { nullable, .. } => *nullable, + } } +} - #[test] - fn from_properties_with_repeated_keys() { - let schema: Schema = serde_json::from_value::(json!({ - "properties": { - "foo": {}, - }, - "optionalProperties": { - "foo": {}, - }, - "nullable": true, - })) - .unwrap() - .try_into() - .unwrap(); - - assert_eq!( - Err(ValidateError::RepeatedProperty("foo".to_owned())), - schema.validate() - ); - } +#[cfg(test)] +mod tests { + use crate::{Schema, SerdeSchema}; #[test] - fn from_discriminator_with_non_properties_mapping() { - let schema: Schema = serde_json::from_value::(json!({ - "discriminator": "foo", - "mapping": { - "foo": { - "values": {} - } - } - })) - .unwrap() - .try_into() - .unwrap(); - - assert_eq!( - Err(ValidateError::MappingNotPropertiesForm), - schema.validate() - ); - } + fn invalid_schemas() { + use std::collections::BTreeMap; - #[test] - fn from_discriminator_with_mapping_redefining_discriminator() { - let schema: Schema = serde_json::from_value::(json!({ - "discriminator": "foo", - "mapping": { - "foo": { - "properties": { "foo": {}} + let test_cases: BTreeMap = serde_json::from_str(include_str!( + "../json-typedef-spec/tests/invalid_schemas.json" + )) + .expect("parse invalid_schemas.json"); + + for (test_case_name, test_case) in test_cases { + if let Ok(serde_schema) = serde_json::from_value::(test_case) { + if let Ok(schema) = Schema::from_serde_schema(serde_schema) { + if schema.validate().is_ok() { + panic!( + "failed to detect invalid schema: {}, got: {:?}", + test_case_name, schema + ); + } } } - })) - .unwrap() - .try_into() - .unwrap(); - - assert_eq!( - Err(ValidateError::RepeatedProperty("foo".to_owned())), - schema.validate() - ); + } } #[test] - fn spec_invalid_schemas_suite() { - let test_cases: BTreeMap = serde_json::from_str(include_str!( - "../json-typedef-spec/tests/invalid_schemas.json" - )) - .unwrap(); + fn valid_schemas() { + use std::collections::BTreeMap; - for (name, invalid_schema) in test_cases { - dbg!(&invalid_schema); - if let Ok(schema) = serde_json::from_value::(invalid_schema) { - dbg!(&schema); - let result: Result = schema.try_into(); + #[derive(serde::Deserialize)] + struct TestCase { + schema: serde_json::Value, + } - if let Ok(schema) = result { - dbg!(&name, &schema); - assert!(schema.validate().is_err(), name); - } - } + let test_cases: BTreeMap = + serde_json::from_str(include_str!("../json-typedef-spec/tests/validation.json")) + .expect("parse validation.json"); + + for (test_case_name, test_case) in test_cases { + let serde_schema = + serde_json::from_value::(test_case.schema).expect(&test_case_name); + let schema = Schema::from_serde_schema(serde_schema).expect(&test_case_name); + schema.validate().expect(&test_case_name); } } } diff --git a/src/serde.rs b/src/serde.rs deleted file mode 100644 index 983f446..0000000 --- a/src/serde.rs +++ /dev/null @@ -1,433 +0,0 @@ -use crate::schema; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::collections::BTreeMap; - -#[derive(Clone, Serialize, Deserialize, Debug, Default, PartialEq)] -#[serde(rename_all = "camelCase")] -#[serde(deny_unknown_fields)] -pub struct Schema { - #[serde(skip_serializing_if = "Option::is_none")] - pub definitions: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub nullable: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub ref_: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub type_: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub enum_: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub elements: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub properties: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub optional_properties: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub additional_properties: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub values: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub discriminator: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub mapping: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub metadata: Option>, -} - -impl From for Schema { - fn from(schema: schema::Schema) -> Schema { - use crate::form; - - let mut out = Schema::default(); - - if !schema.definitions.is_empty() { - out.definitions = Some( - schema - .definitions - .into_iter() - .map(|(k, v)| (k, v.into())) - .collect(), - ); - } - - match schema.form { - form::Form::Empty => {} - form::Form::Ref(form::Ref { - nullable, - definition, - }) => { - if nullable { - out.nullable = Some(true); - } - - out.ref_ = Some(definition); - } - form::Form::Type(form::Type { - nullable, - type_value, - }) => { - if nullable { - out.nullable = Some(true); - } - - out.type_ = Some( - match type_value { - form::TypeValue::Boolean => "boolean", - form::TypeValue::Float32 => "float32", - form::TypeValue::Float64 => "float64", - form::TypeValue::Int8 => "int8", - form::TypeValue::Uint8 => "uint8", - form::TypeValue::Int16 => "int16", - form::TypeValue::Uint16 => "uint16", - form::TypeValue::Int32 => "int32", - form::TypeValue::Uint32 => "uint32", - form::TypeValue::String => "string", - form::TypeValue::Timestamp => "timestamp", - } - .to_owned(), - ) - } - form::Form::Enum(form::Enum { nullable, values }) => { - if nullable { - out.nullable = Some(true); - } - - out.enum_ = Some(values.into_iter().collect()); - } - form::Form::Elements(form::Elements { nullable, schema }) => { - if nullable { - out.nullable = Some(true); - } - - out.elements = Some(Box::new((*schema).into())); - } - form::Form::Properties(form::Properties { - nullable, - required, - optional, - additional, - has_required, - }) => { - if nullable { - out.nullable = Some(true); - } - - if has_required { - out.properties = - Some(required.into_iter().map(|(k, v)| (k, v.into())).collect()); - } - - if !optional.is_empty() { - out.optional_properties = - Some(optional.into_iter().map(|(k, v)| (k, v.into())).collect()); - } - - if additional { - out.additional_properties = Some(true); - } - } - form::Form::Values(form::Values { nullable, schema }) => { - if nullable { - out.nullable = Some(true); - } - - out.values = Some(Box::new((*schema).into())); - } - form::Form::Discriminator(form::Discriminator { - nullable, - discriminator, - mapping, - }) => { - if nullable { - out.nullable = Some(true); - } - - out.discriminator = Some(discriminator); - out.mapping = Some(mapping.into_iter().map(|(k, v)| (k, v.into())).collect()); - } - } - - if !schema.metadata.is_empty() { - out.metadata = Some(schema.metadata); - } - - out - } -} - -#[cfg(test)] -mod tests { - use serde_json::json; - - #[test] - fn serialize_partial() { - // Fields are None by default. These shouldn't be serialized. - assert_eq!( - "{\"ref\":\"foo\"}", - serde_json::to_string(&super::Schema { - ref_: Some("foo".to_owned()), - ..Default::default() - }) - .unwrap() - ); - } - - #[test] - fn parse_empty() { - assert_eq!( - super::Schema::default(), - serde_json::from_value(json!({})).unwrap() - ); - } - - #[test] - fn parse_partial() { - assert_eq!( - super::Schema { - nullable: Some(true), - optional_properties: Some( - vec![( - "foo".to_owned(), - super::Schema { - type_: Some("uint32".to_owned()), - ..Default::default() - } - )] - .into_iter() - .collect() - ), - ..Default::default() - }, - serde_json::from_value(json!({ - "optionalProperties": { - "foo": { - "type": "uint32", - }, - }, - "nullable": true, - })) - .unwrap() - ); - } - - #[test] - fn parse_full() { - assert_eq!( - super::Schema { - definitions: Some( - vec![( - "foo".to_owned(), - super::Schema { - type_: Some("uint32".to_owned()), - ..Default::default() - } - )] - .into_iter() - .collect() - ), - nullable: Some(true), - ref_: Some("foo".to_owned()), - type_: Some("uint32".to_owned()), - enum_: Some(vec!["foo".to_owned(), "bar".to_owned()]), - elements: Some(Box::new(super::Schema { - type_: Some("uint32".to_owned()), - ..Default::default() - })), - properties: Some( - vec![( - "foo".to_owned(), - super::Schema { - type_: Some("uint32".to_owned()), - ..Default::default() - } - )] - .into_iter() - .collect() - ), - optional_properties: Some( - vec![( - "foo".to_owned(), - super::Schema { - type_: Some("uint32".to_owned()), - ..Default::default() - } - )] - .into_iter() - .collect() - ), - additional_properties: Some(true), - values: Some(Box::new(super::Schema { - type_: Some("uint32".to_owned()), - ..Default::default() - })), - discriminator: Some("foo".to_owned()), - mapping: Some( - vec![( - "foo".to_owned(), - super::Schema { - type_: Some("uint32".to_owned()), - ..Default::default() - } - )] - .into_iter() - .collect() - ), - metadata: Some(vec![("foo".to_owned(), json!("bar"))].into_iter().collect()), - }, - serde_json::from_value(json!({ - "definitions": { - "foo": { - "type": "uint32", - }, - }, - "nullable": true, - "ref": "foo", - "type": "uint32", - "enum": ["foo", "bar"], - "elements": { - "type": "uint32", - }, - "properties": { - "foo": { - "type": "uint32", - }, - }, - "optionalProperties": { - "foo": { - "type": "uint32", - }, - }, - "additionalProperties": true, - "values": { - "type": "uint32", - }, - "discriminator": "foo", - "mapping": { - "foo": { - "type": "uint32", - }, - }, - "metadata": { - "foo": "bar", - }, - })) - .unwrap() - ); - } - - #[test] - fn from_empty() { - assert_roundtrip_try_into_from(json!({})); - } - - #[test] - fn from_ref() { - assert_roundtrip_try_into_from(json!({"ref": "foo"})); - assert_roundtrip_try_into_from(json!({"ref": "foo", "nullable": true})); - } - - #[test] - fn from_type() { - assert_roundtrip_try_into_from(json!({"type": "boolean"})); - assert_roundtrip_try_into_from(json!({"type": "boolean", "nullable": true})); - - assert_roundtrip_try_into_from(json!({"type": "int8"})); - assert_roundtrip_try_into_from(json!({"type": "uint8"})); - assert_roundtrip_try_into_from(json!({"type": "int16"})); - assert_roundtrip_try_into_from(json!({"type": "uint16"})); - assert_roundtrip_try_into_from(json!({"type": "int32"})); - assert_roundtrip_try_into_from(json!({"type": "uint32"})); - assert_roundtrip_try_into_from(json!({"type": "string"})); - assert_roundtrip_try_into_from(json!({"type": "timestamp"})); - } - - #[test] - fn from_enum() { - assert_roundtrip_try_into_from(json!({ "enum": ["foo"] })); - assert_roundtrip_try_into_from(json!({ "enum": ["foo"], "nullable": true })); - } - - #[test] - fn from_elements() { - assert_roundtrip_try_into_from(json!({ "elements": { "type": "boolean" } })); - assert_roundtrip_try_into_from( - json!({ "elements": { "type": "boolean" }, "nullable": true }), - ); - } - - #[test] - fn from_properties() { - assert_roundtrip_try_into_from(json!({ "properties": { "foo": { "type": "boolean" }}})); - assert_roundtrip_try_into_from( - json!({ "optionalProperties": { "foo": { "type": "boolean" }}}), - ); - assert_roundtrip_try_into_from( - json!({ "properties": { "foo": { "type": "boolean" }}, "nullable": true }), - ); - assert_roundtrip_try_into_from( - json!({ "optionalProperties": { "foo": { "type": "boolean" }}, "nullable": true }), - ); - assert_roundtrip_try_into_from(json!({ - "properties": { "foo": { "type": "boolean" }}, - "optionalProperties": { "bar": { "type": "boolean" }}, - })); - assert_roundtrip_try_into_from(json!({ - "properties": { "foo": { "type": "boolean" }}, - "optionalProperties": { "bar": { "type": "boolean" }}, - "nullable": true, - })); - } - - #[test] - fn from_values() { - assert_roundtrip_try_into_from(json!({ "values": { "type": "boolean" } })); - assert_roundtrip_try_into_from( - json!({ "values": { "type": "boolean" }, "nullable": true }), - ); - } - - #[test] - fn from_discriminator() { - assert_roundtrip_try_into_from(json!({ - "discriminator": "foo", - "mapping": { - "foo": { - "properties": { "bar": { "type": "boolean" }}, - }, - }, - })); - - assert_roundtrip_try_into_from(json!({ - "discriminator": "foo", - "mapping": { - "foo": { - "properties": { "bar": { "type": "boolean" }} - } - }, - "nullable": true, - })); - } - - fn assert_roundtrip_try_into_from(json: serde_json::Value) { - use crate::schema; - use std::convert::TryInto; - - let serde_schema: super::Schema = serde_json::from_value(json).unwrap(); - let schema: schema::Schema = serde_schema.clone().try_into().unwrap(); - - assert_eq!(serde_schema, schema.into()); - } -} diff --git a/src/serde_schema.rs b/src/serde_schema.rs new file mode 100644 index 0000000..d77b50b --- /dev/null +++ b/src/serde_schema.rs @@ -0,0 +1,47 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::BTreeMap; + +#[derive(Clone, Serialize, Deserialize, Debug, Default, PartialEq)] +#[serde(rename_all = "camelCase")] +#[serde(deny_unknown_fields)] +pub struct SerdeSchema { + #[serde(skip_serializing_if = "Option::is_none")] + pub definitions: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub nullable: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub ref_: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub type_: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub enum_: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub elements: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub properties: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub optional_properties: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub additional_properties: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub values: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub discriminator: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub mapping: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option>, +} diff --git a/src/validate.rs b/src/validate.rs new file mode 100644 index 0000000..eb06e25 --- /dev/null +++ b/src/validate.rs @@ -0,0 +1,424 @@ +use crate::{Schema, Type}; +use chrono::DateTime; +use serde_json::Value; +use thiserror::Error; + +#[derive(Default)] +pub struct ValidateOptions { + max_depth: usize, + max_errors: usize, +} + +impl ValidateOptions { + pub fn new() -> Self { + Self::default() + } + + pub fn with_max_depth(mut self, max_depth: usize) -> Self { + self.max_depth = max_depth; + self + } + + pub fn with_max_errors(mut self, max_errors: usize) -> Self { + self.max_errors = max_errors; + self + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Error)] +pub enum ValidateError { + #[error("max depth exceeded")] + MaxDepthExceeded, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ValidationErrorIndicator { + pub instance_path: Vec, + pub schema_path: Vec, +} + +pub fn validate( + schema: &Schema, + instance: &Value, + options: ValidateOptions, +) -> Result, ValidateError> { + let mut vm = Vm { + max_depth: options.max_depth, + max_errors: options.max_errors, + instance_tokens: vec![], + schema_tokens: vec![vec![]], + errors: vec![], + }; + + match vm.validate(schema, schema, None, instance) { + Ok(()) | Err(VmValidateError::MaxErrorsReached) => Ok(vm.errors), + Err(VmValidateError::MaxDepthExceeded) => Err(ValidateError::MaxDepthExceeded), + } +} + +struct Vm { + pub max_depth: usize, + pub max_errors: usize, + pub instance_tokens: Vec, + pub schema_tokens: Vec>, + pub errors: Vec, +} + +enum VmValidateError { + MaxErrorsReached, + MaxDepthExceeded, +} + +impl Vm { + pub fn validate( + &mut self, + root: &Schema, + schema: &Schema, + parent_tag: Option<&str>, + instance: &Value, + ) -> Result<(), VmValidateError> { + if instance.is_null() && schema.nullable() { + return Ok(()); + } + + match schema { + Schema::Empty { .. } => {} + Schema::Ref { ref_, .. } => { + self.schema_tokens + .push(vec!["definitions".to_owned(), ref_.clone()]); + if self.schema_tokens.len() == self.max_depth { + return Err(VmValidateError::MaxDepthExceeded); + } + + self.validate(root, &root.definitions()[ref_], None, instance)?; + self.schema_tokens.pop(); + } + Schema::Type { type_, .. } => { + self.push_schema_token("type"); + + match type_ { + Type::Boolean => { + if !instance.is_boolean() { + self.push_error()?; + } + } + Type::Float32 | Type::Float64 => { + if !instance.is_f64() && !instance.is_i64() { + self.push_error()?; + } + } + Type::Int8 => self.validate_int(instance, -128.0, 127.0)?, + Type::Uint8 => self.validate_int(instance, 0.0, 255.0)?, + Type::Int16 => self.validate_int(instance, -32768.0, 32767.0)?, + Type::Uint16 => self.validate_int(instance, 0.0, 65535.0)?, + Type::Int32 => self.validate_int(instance, -2147483648.0, 2147483647.0)?, + Type::Uint32 => self.validate_int(instance, 0.0, 4294967295.0)?, + Type::String => { + if !instance.is_string() { + self.push_error()?; + } + } + Type::Timestamp => { + if let Some(s) = instance.as_str() { + if DateTime::parse_from_rfc3339(s).is_err() { + self.push_error()?; + } + } else { + self.push_error()?; + } + } + }; + + self.pop_schema_token(); + } + Schema::Enum { enum_, .. } => { + self.push_schema_token("enum"); + if let Some(s) = instance.as_str() { + if !enum_.contains(s) { + self.push_error()?; + } + } else { + self.push_error()?; + } + self.pop_schema_token(); + } + Schema::Elements { elements, .. } => { + self.push_schema_token("elements"); + + if let Some(arr) = instance.as_array() { + for (i, sub_instance) in arr.iter().enumerate() { + self.push_instance_token(&i.to_string()); + self.validate(root, elements, None, sub_instance)?; + self.pop_instance_token(); + } + } else { + self.push_error()?; + } + + self.pop_schema_token(); + } + Schema::Properties { + properties, + optional_properties, + properties_is_present, + additional_properties, + .. + } => { + if let Some(obj) = instance.as_object() { + self.push_schema_token("properties"); + for (name, sub_schema) in properties { + self.push_schema_token(name); + if let Some(sub_instance) = obj.get(name) { + self.push_instance_token(name); + self.validate(root, sub_schema, None, sub_instance)?; + self.pop_instance_token(); + } else { + self.push_error()?; + } + self.pop_schema_token(); + } + self.pop_schema_token(); + + self.push_schema_token("optionalProperties"); + for (name, sub_schema) in optional_properties { + self.push_schema_token(name); + if let Some(sub_instance) = obj.get(name) { + self.push_instance_token(name); + self.validate(root, sub_schema, None, sub_instance)?; + self.pop_instance_token(); + } + self.pop_schema_token(); + } + self.pop_schema_token(); + + if !*additional_properties { + for name in obj.keys() { + if parent_tag != Some(name) + && !properties.contains_key(name) + && !optional_properties.contains_key(name) + { + self.push_instance_token(name); + self.push_error()?; + self.pop_instance_token(); + } + } + } + } else { + self.push_schema_token(if *properties_is_present { + "properties" + } else { + "optionalProperties" + }); + self.push_error()?; + self.pop_schema_token(); + } + } + Schema::Values { values, .. } => { + self.push_schema_token("values"); + + if let Some(obj) = instance.as_object() { + for (name, sub_instance) in obj { + self.push_instance_token(name); + self.validate(root, values, None, sub_instance)?; + self.pop_instance_token(); + } + } else { + self.push_error()?; + } + + self.pop_schema_token(); + } + Schema::Discriminator { + discriminator, + mapping, + .. + } => { + if let Some(obj) = instance.as_object() { + if let Some(tag) = obj.get(discriminator) { + if let Some(tag) = tag.as_str() { + if let Some(schema) = mapping.get(tag) { + self.push_schema_token("mapping"); + self.push_schema_token(tag); + self.validate(root, schema, Some(discriminator), instance)?; + self.pop_schema_token(); + self.pop_schema_token(); + } else { + self.push_schema_token("mapping"); + self.push_instance_token(discriminator); + self.push_error()?; + self.pop_instance_token(); + self.pop_schema_token(); + } + } else { + self.push_schema_token("discriminator"); + self.push_instance_token(discriminator); + self.push_error()?; + self.pop_instance_token(); + self.pop_schema_token(); + } + } else { + self.push_schema_token("discriminator"); + self.push_error()?; + self.pop_schema_token(); + } + } else { + self.push_schema_token("discriminator"); + self.push_error()?; + self.pop_schema_token(); + } + } + }; + + Ok(()) + } + + fn validate_int( + &mut self, + instance: &Value, + min: f64, + max: f64, + ) -> Result<(), VmValidateError> { + if let Some(val) = instance.as_f64() { + if val.fract() != 0.0 || val < min || val > max { + self.push_error() + } else { + Ok(()) + } + } else { + self.push_error() + } + } + + fn push_error(&mut self) -> Result<(), VmValidateError> { + self.errors.push(ValidationErrorIndicator { + instance_path: self.instance_tokens.clone(), + schema_path: self.schema_tokens.last().unwrap().clone(), + }); + + if self.max_errors == self.errors.len() { + Err(VmValidateError::MaxErrorsReached) + } else { + Ok(()) + } + } + + fn push_schema_token(&mut self, token: &str) { + self.schema_tokens + .last_mut() + .unwrap() + .push(token.to_owned()); + } + + fn pop_schema_token(&mut self) { + self.schema_tokens.last_mut().unwrap().pop().unwrap(); + } + + fn push_instance_token(&mut self, token: &str) { + self.instance_tokens.push(token.to_owned()); + } + + fn pop_instance_token(&mut self) { + self.instance_tokens.pop().unwrap(); + } +} + +#[cfg(test)] +mod tests { + #[test] + fn max_depth() { + use serde_json::json; + + let schema = crate::Schema::from_serde_schema( + serde_json::from_value(json!({ + "definitions": { + "loop": { "ref": "loop" }, + }, + "ref": "loop", + })) + .unwrap(), + ) + .unwrap(); + + assert_eq!( + super::ValidateError::MaxDepthExceeded, + super::validate( + &schema, + &json!(null), + super::ValidateOptions::new().with_max_depth(3) + ) + .unwrap_err() + ) + } + + #[test] + fn max_errors() { + use serde_json::json; + + let schema = crate::Schema::from_serde_schema( + serde_json::from_value(json!({ + "elements": { "type": "string" } + })) + .unwrap(), + ) + .unwrap(); + + assert_eq!( + 3, + super::validate( + &schema, + &json!([null, null, null, null, null]), + super::ValidateOptions::new().with_max_errors(3) + ) + .unwrap() + .len() + ) + } + + #[test] + fn validation_spec() { + use std::collections::{BTreeMap, HashSet}; + + #[derive(serde::Deserialize, PartialEq, Debug, Eq, Hash)] + struct TestCaseError { + #[serde(rename = "instancePath")] + instance_path: Vec, + + #[serde(rename = "schemaPath")] + schema_path: Vec, + } + + #[derive(serde::Deserialize)] + struct TestCase { + schema: crate::SerdeSchema, + instance: serde_json::Value, + errors: Vec, + } + + let test_cases: BTreeMap = + serde_json::from_str(include_str!("../json-typedef-spec/tests/validation.json")) + .expect("parse validation.json"); + + for (test_case_name, test_case) in test_cases { + let schema = crate::Schema::from_serde_schema(test_case.schema).expect(&test_case_name); + schema.validate().expect(&test_case_name); + + let errors: HashSet<_> = + super::validate(&schema, &test_case.instance, super::ValidateOptions::new()) + .expect(&test_case_name) + .into_iter() + .map(|err| TestCaseError { + instance_path: err.instance_path, + schema_path: err.schema_path, + }) + .collect(); + + let test_case_errors: HashSet<_> = test_case.errors.into_iter().collect(); + + assert_eq!( + test_case_errors, errors, + "wrong validation errors returned: {}", + &test_case_name + ); + } + } +} diff --git a/src/validator.rs b/src/validator.rs deleted file mode 100644 index f71f12b..0000000 --- a/src/validator.rs +++ /dev/null @@ -1,436 +0,0 @@ -use crate::form; -use crate::schema::Schema; -use chrono::DateTime; -use serde_json::Value; - -#[derive(Debug)] -pub struct Validator { - pub max_depth: Option, - pub max_errors: Option, -} - -#[derive(Debug, PartialEq)] -pub struct ValidationError { - pub instance_path: Vec, - pub schema_path: Vec, -} - -#[derive(Debug, PartialEq)] -pub enum ValidateError { - MaxDepthExceeded, -} - -impl Validator { - pub fn validate( - &self, - schema: &Schema, - instance: &Value, - ) -> Result, ValidateError> { - let mut vm = Vm { - max_depth: self.max_depth, - max_errors: self.max_errors, - instance_tokens: vec![], - schema_tokens: vec![vec![]], - errors: vec![], - }; - - match vm.validate(schema, schema, None, instance) { - Ok(()) | Err(VmValidateError::MaxErrorsReached) => Ok(vm.errors), - Err(VmValidateError::MaxDepthExceeded) => Err(ValidateError::MaxDepthExceeded), - } - } -} - -struct Vm { - pub max_depth: Option, - pub max_errors: Option, - pub instance_tokens: Vec, - pub schema_tokens: Vec>, - pub errors: Vec, -} - -enum VmValidateError { - MaxErrorsReached, - MaxDepthExceeded, -} - -impl Vm { - pub fn validate( - &mut self, - root: &Schema, - schema: &Schema, - parent_tag: Option<&str>, - instance: &Value, - ) -> Result<(), VmValidateError> { - match &schema.form { - form::Form::Empty => {} - form::Form::Ref(form::Ref { - nullable, - definition, - }) => { - if !*nullable || !instance.is_null() { - if self.max_depth == Some(self.schema_tokens.len()) { - return Err(VmValidateError::MaxDepthExceeded); - } - - self.schema_tokens - .push(vec!["definitions".to_owned(), definition.clone()]); - self.validate(root, &root.definitions[definition], None, instance)?; - self.schema_tokens.pop(); - } - } - form::Form::Type(form::Type { - nullable, - type_value, - }) => { - if !*nullable || !instance.is_null() { - self.push_schema_token("type"); - - match type_value { - form::TypeValue::Boolean => { - if !instance.is_boolean() { - self.push_error()?; - } - } - form::TypeValue::Float32 | form::TypeValue::Float64 => { - if !instance.is_f64() && !instance.is_i64() { - self.push_error()?; - } - } - form::TypeValue::Int8 => self.validate_int(instance, -128.0, 127.0)?, - form::TypeValue::Uint8 => self.validate_int(instance, 0.0, 255.0)?, - form::TypeValue::Int16 => self.validate_int(instance, -32768.0, 32767.0)?, - form::TypeValue::Uint16 => self.validate_int(instance, 0.0, 65535.0)?, - form::TypeValue::Int32 => { - self.validate_int(instance, -2147483648.0, 2147483647.0)? - } - form::TypeValue::Uint32 => { - self.validate_int(instance, 0.0, 4294967295.0)? - } - form::TypeValue::String => { - if !instance.is_string() { - self.push_error()?; - } - } - form::TypeValue::Timestamp => { - if let Some(s) = instance.as_str() { - if DateTime::parse_from_rfc3339(s).is_err() { - self.push_error()?; - } - } else { - self.push_error()?; - } - } - }; - - self.pop_schema_token(); - } - } - form::Form::Enum(form::Enum { nullable, values }) => { - if !*nullable || !instance.is_null() { - self.push_schema_token("enum"); - if let Some(s) = instance.as_str() { - if !values.contains(s) { - self.push_error()?; - } - } else { - self.push_error()?; - } - self.pop_schema_token(); - } - } - form::Form::Elements(form::Elements { nullable, schema }) => { - if !*nullable || !instance.is_null() { - self.push_schema_token("elements"); - - if let Some(arr) = instance.as_array() { - for (i, sub_instance) in arr.iter().enumerate() { - self.push_instance_token(&i.to_string()); - self.validate(root, schema, None, sub_instance)?; - self.pop_instance_token(); - } - } else { - self.push_error()?; - } - - self.pop_schema_token(); - } - } - form::Form::Properties(form::Properties { - nullable, - required, - optional, - additional, - has_required, - }) => { - if !*nullable || !instance.is_null() { - if let Some(obj) = instance.as_object() { - self.push_schema_token("properties"); - for (name, sub_schema) in required { - self.push_schema_token(name); - if let Some(sub_instance) = obj.get(name) { - self.push_instance_token(name); - self.validate(root, sub_schema, None, sub_instance)?; - self.pop_instance_token(); - } else { - self.push_error()?; - } - self.pop_schema_token(); - } - self.pop_schema_token(); - - self.push_schema_token("optionalProperties"); - for (name, sub_schema) in optional { - self.push_schema_token(name); - if let Some(sub_instance) = obj.get(name) { - self.push_instance_token(name); - self.validate(root, sub_schema, None, sub_instance)?; - self.pop_instance_token(); - } - self.pop_schema_token(); - } - self.pop_schema_token(); - - if !*additional { - for name in obj.keys() { - if parent_tag != Some(name) - && !required.contains_key(name) - && !optional.contains_key(name) - { - self.push_instance_token(name); - self.push_error()?; - self.pop_instance_token(); - } - } - } - } else { - self.push_schema_token(if *has_required { - "properties" - } else { - "optionalProperties" - }); - self.push_error()?; - self.pop_schema_token(); - } - } - } - form::Form::Values(form::Values { nullable, schema }) => { - if !*nullable || !instance.is_null() { - self.push_schema_token("values"); - - if let Some(obj) = instance.as_object() { - for (name, sub_instance) in obj { - self.push_instance_token(name); - self.validate(root, schema, None, sub_instance)?; - self.pop_instance_token(); - } - } else { - self.push_error()?; - } - - self.pop_schema_token(); - } - } - form::Form::Discriminator(form::Discriminator { - nullable, - discriminator, - mapping, - }) => { - if !*nullable || !instance.is_null() { - if let Some(obj) = instance.as_object() { - if let Some(tag) = obj.get(discriminator) { - if let Some(tag) = tag.as_str() { - if let Some(schema) = mapping.get(tag) { - self.push_schema_token("mapping"); - self.push_schema_token(tag); - self.validate(root, schema, Some(discriminator), instance)?; - self.pop_schema_token(); - self.pop_schema_token(); - } else { - self.push_schema_token("mapping"); - self.push_instance_token(discriminator); - self.push_error()?; - self.pop_instance_token(); - self.pop_schema_token(); - } - } else { - self.push_schema_token("discriminator"); - self.push_instance_token(discriminator); - self.push_error()?; - self.pop_instance_token(); - self.pop_schema_token(); - } - } else { - self.push_schema_token("discriminator"); - self.push_error()?; - self.pop_schema_token(); - } - } else { - self.push_schema_token("discriminator"); - self.push_error()?; - self.pop_schema_token(); - } - } - } - }; - - Ok(()) - } - - fn validate_int( - &mut self, - instance: &Value, - min: f64, - max: f64, - ) -> Result<(), VmValidateError> { - if let Some(val) = instance.as_f64() { - if val.fract() != 0.0 || val < min || val > max { - self.push_error() - } else { - Ok(()) - } - } else { - self.push_error() - } - } - - fn push_error(&mut self) -> Result<(), VmValidateError> { - self.errors.push(ValidationError { - instance_path: self.instance_tokens.clone(), - schema_path: self.schema_tokens.last().unwrap().clone(), - }); - - if self.max_errors == Some(self.errors.len()) { - Err(VmValidateError::MaxErrorsReached) - } else { - Ok(()) - } - } - - fn push_schema_token(&mut self, token: &str) { - self.schema_tokens - .last_mut() - .unwrap() - .push(token.to_owned()); - } - - fn pop_schema_token(&mut self) { - self.schema_tokens.last_mut().unwrap().pop().unwrap(); - } - - fn push_instance_token(&mut self, token: &str) { - self.instance_tokens.push(token.to_owned()); - } - - fn pop_instance_token(&mut self) { - self.instance_tokens.pop().unwrap(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::SerdeSchema; - use serde::{Deserialize, Serialize}; - use serde_json::json; - use std::collections::{BTreeMap, BTreeSet}; - use std::convert::TryInto; - - #[test] - fn max_depth() { - let schema: Schema = serde_json::from_value::(json!({ - "definitions": { - "loop": { "ref": "loop" }, - }, - "ref": "loop", - })) - .unwrap() - .try_into() - .unwrap(); - - let validator = Validator { - max_depth: Some(3), - max_errors: None, - }; - assert_eq!( - Err(ValidateError::MaxDepthExceeded), - validator.validate(&schema, &json!(null)) - ); - } - - #[test] - fn max_errors() { - let schema: Schema = serde_json::from_value::(json!({ - "elements": { - "type": "string", - }, - })) - .unwrap() - .try_into() - .unwrap(); - - let validator = Validator { - max_depth: None, - max_errors: Some(3), - }; - assert_eq!( - 3, - validator - .validate(&schema, &json!([null, null, null, null, null])) - .unwrap() - .len(), - ); - } - - #[test] - fn spec_validation_suite() { - #[derive(Serialize, Deserialize)] - struct TestCase { - schema: crate::serde::Schema, - instance: Value, - errors: Vec, - } - - #[derive(Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug)] - #[serde(rename_all = "camelCase")] - struct TestCaseError { - instance_path: Vec, - schema_path: Vec, - } - - let test_cases: BTreeMap = - serde_json::from_str(include_str!("../json-typedef-spec/tests/validation.json")) - .unwrap(); - - for (name, test_case) in test_cases { - let schema: Schema = test_case - .schema - .try_into() - .expect(&format!("parsing schema: {}", name)); - - schema.validate().expect(&format!("validating schema: {}", name)); - - let validator = Validator { - max_depth: None, - max_errors: None, - }; - - let errors: BTreeSet<_> = validator - .validate(&schema, &test_case.instance) - .expect(&format!("validating: {}", name)) - .into_iter() - .map(|err| TestCaseError { - instance_path: err.instance_path, - schema_path: err.schema_path, - }) - .collect(); - - assert_eq!( - test_case.errors.into_iter().collect::>(), - errors, - "wrong set of errors returned for test case: {}", - name - ); - } - } -} From fc13da22bff00e41de64b7335771e74910762916 Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Fri, 22 Jan 2021 11:29:39 -0800 Subject: [PATCH 3/7] Use Cow, document public members --- src/lib.rs | 174 ++++++++++++++- src/schema.rs | 514 ++++++++++++++++++++++++++++++++++++++++++++ src/serde_schema.rs | 20 +- src/validate.rs | 294 ++++++++++++++++++++----- 4 files changed, 941 insertions(+), 61 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2d12f51..ca5c1c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,168 @@ -// pub mod form; -// pub mod schema; -// pub mod serde; -// pub mod validator; - -// pub use crate::serde::Schema as SerdeSchema; -// pub use form::Form; -// pub use schema::Schema; -// pub use validator::{ValidationError, Validator}; +//! An implementation of [JSON Type Definition](https://jsontypedef.com), [RFC +//! 8927](https://tools.ietf.org/html/rfc8927). +//! +//! `jtd` lets you parse and ensure the validity of JSON Typedef schemas, and +//! then validate JSON data against those schemas. If your goal is instead to +//! generate Rust types from JSON Typedef schemas, see +//! [`jtd-codegen`](https://github.com/jsontypedef/json-typedef-codegen). +//! +//! # Quick start +//! +//! Here's how you can parse a JSON Typedef schema and then use it to validate +//! data against that schema. +//! +//! ``` +//! use jtd::Schema; +//! use serde_json::json; +//! +//! let schema = Schema::from_serde_schema( +//! serde_json::from_value(json!({ +//! "properties": { +//! "foo": { "type": "string" }, +//! "bar": { "type": "boolean" } +//! } +//! })) +//! .expect("Parse schema"), +//! ) +//! .expect("Construct schema from JSON data"); +//! +//! schema.validate().expect("Invalid schema"); +//! +//! // This input is ok, so validate comes back empty. +//! let input_ok = json!({ "foo": "xxx", "bar": true }); +//! assert!(jtd::validate(&schema, &input_ok, Default::default()).unwrap().is_empty()); +//! +//! // This input is bad (bar has type string, not boolean), so validate does +//! // not come back empty. +//! let input_bad = json!({ "foo": "xxx", "bar": "false" }); +//! assert!(!jtd::validate(&schema, &input_bad, Default::default()).unwrap().is_empty()); +//! ``` +//! +//! Or, at a high level: +//! +//! 1. Use `serde_json` to parse JSON data into a [`SerdeSchema`]. +//! 2. Convert that into a [`Schema`] using [`Schema::from_serde_schema`]. +//! 3. Optionally, ensure that schema is "valid" using [`Schema::validate`]. +//! 4. Verify data against that schema using [`validate()`]. +//! +//! # Common usage +//! +//! The example above shows you how you can quickly use JSON Typedef to check +//! whether data is valid. But in the real world, you usually want to know what +//! the validation errors were, rather than just flatly rejecting input as +//! "invalid" without any further details. +//! +//! One benefit of JSON Type Definition is that the exact data inside the +//! validation errors is part of the specification; that means validation errors +//! are portable. Here's an example of what those validation errors look like, +//! and how you can access them with this crate. +//! +//! ``` +//! use jtd::{Schema, ValidationErrorIndicator}; +//! use serde_json::json; +//! +//! let schema = Schema::from_serde_schema( +//! serde_json::from_value(json!({ +//! "properties": { +//! "name": { "type": "string" }, +//! "age": { "type": "uint32" }, +//! "phones": { +//! "elements": { +//! "type": "string" +//! } +//! } +//! } +//! })) +//! .expect("Parse schema"), +//! ) +//! .expect("Construct schema from JSON data"); +//! +//! schema.validate().expect("Invalid schema"); +//! +//! // Since this first example is valid, we'll get back an empty list of +//! // validation errors. +//! let input_ok = json!({ +//! "name": "John Doe", +//! "age": 43, +//! "phones": ["+44 1234567", "+44 2345678"] +//! }); +//! +//! assert_eq!( +//! Vec::::new(), +//! jtd::validate(&schema, &input_ok, Default::default()).unwrap(), +//! ); +//! +//! // This example is invalid, so we'll get back three validation errors: +//! // +//! // 1. "name" is required but not present, +//! // 2. "age" has the wrong type +//! // 3. "phones[1]" has the wrong type +//! let input_bad = json!({ +//! "age": "43", +//! "phones": ["+44 1234567", 442345678] +//! }); +//! +//! // Each error indicator has two pieces of information: the path to the part +//! // of the input that was rejected (the "instance path"), and the part of the +//! // schema that rejected it (the "schema path"). +//! // +//! // The exact values of the instance path and schema path is specified in the +//! // JSON Type Definition spec. +//! assert_eq!( +//! vec![ +//! // "age" has the wrong type (required by "/properties/age/type") +//! ValidationErrorIndicator { +//! instance_path: vec!["age".into()], +//! schema_path: vec!["properties".into(), "age".into(), "type".into()], +//! }, +//! +//! // "name" is missing (required by "/properties/name") +//! ValidationErrorIndicator { +//! instance_path: vec![], +//! schema_path: vec!["properties".into(), "name".into()], +//! }, +//! +//! // "phones/1" has the wrong type (required by "/properties/phones/elements/type") +//! ValidationErrorIndicator { +//! instance_path: vec!["phones".into(), "1".into()], +//! schema_path: vec![ +//! "properties".into(), +//! "phones".into(), +//! "elements".into(), +//! "type".into() +//! ], +//! }, +//! ], +//! jtd::validate(&schema, &input_bad, Default::default()).unwrap(), +//! ); +//! ``` +//! +//! # Advanced usage +//! +//! The examples above skim over some details of how you can use this crate. +//! Here are pieces of documentation that you may find relevant: +//! +//! * If you want to convert JSON Type Defintion schemas to/from JSON, and +//! validate whether a schema is valid, see [`SerdeSchema`], +//! [`Schema::from_serde_schema`], and [`Schema::validate`]. +//! +//! * If you want better performance out of [`validate()`], see +//! [`ValidateOptions`] to see how you can make validation faster. +//! +//! # Security considerations +//! +//! If you're running [`validate()`] with untrusted schemas (untrusted inputs is +//! fine), then be aware of this security consideration from RFC 8927: +//! +//! > Implementations that evaluate user-inputted schemas SHOULD implement +//! > mechanisms to detect and abort circular references that might cause a +//! > naive implementation to go into an infinite loop. Without such +//! > mechanisms, implementations may be vulnerable to denial-of-service +//! > attacks. +//! +//! This crate supports that "detect and abort" mechanism via +//! [`ValidateOptions::with_max_depth`]. Please see that documentation if you're +//! validating data against untrusted schemas. mod schema; mod serde_schema; diff --git a/src/schema.rs b/src/schema.rs index cfd4964..a24fb41 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -3,110 +3,515 @@ use serde_json::Value; use std::collections::{BTreeMap, BTreeSet}; use thiserror::Error; +/// A convenience alias for the JSON Typedef `definitions` keyword value. pub type Definitions = BTreeMap; + +/// A convenience alias for the JSON Typedef `metadata` keyword value. pub type Metadata = BTreeMap; +/// A pattern-matching-friendly representation of a JSON Typedef schema. +/// +/// Each variant of this schema corresponds to one of the eight "forms" a schema +/// may take on. All of the forms share the following fields: +/// +/// * `definitions` corresponds to the JSON Typedef keyword of the same name. +/// This should only be non-empty on root schemas. Otherwise, +/// [`Schema::validate`] will return +/// [`SchemaValidateError::NonRootDefinitions`]. +/// +/// * `metadata` corresponds to the JSON Typedef keyword of the same name. Use +/// this to convey information not pertinent to validation, such as hints for +/// code generation. Do not expect other parties to understand the fields +/// inside metadata unless you've agreed upon them out-of-band. +/// +/// Except for [`Schema::Empty`], all of the forms also share one additional +/// field: +/// +/// * `nullable` corresponds to the JSON Typedef keyword of the same name. If +/// set to "true", then regardless of any other considerations the schema will +/// accept JSON `null` as valid. +/// +/// [`Schema::Empty`] omits `nullable` because it's redundant; schemas of the +/// empty form already accept `null` anyway. +/// +/// For convenience, these three common properties have associated borrowing +/// "getters": [`Schema::definitions`], [`Schema::metadata`], and +/// [`Schema::nullable`]. +/// +/// If you are trying to parse a JSON Typedef schema from JSON, see +/// [`SerdeSchema`] and [`Schema::from_serde_schema`]. +/// +/// ``` +/// use jtd::{SerdeSchema, Schema}; +/// use serde_json::json; +/// +/// assert_eq!( +/// Schema::from_serde_schema(serde_json::from_value(json!({ +/// "elements": { +/// "type": "uint32", +/// "nullable": true +/// } +/// })).unwrap()).unwrap(), +/// jtd::Schema::Elements { +/// definitions: Default::default(), +/// metadata: Default::default(), +/// nullable: false, +/// elements: Box::new(jtd::Schema::Type { +/// definitions: Default::default(), +/// metadata: Default::default(), +/// nullable: true, +/// type_: jtd::Type::Uint32, +/// }) +/// } +/// ); +/// ``` #[derive(Clone, Debug, PartialEq)] pub enum Schema { + /// The [empty](https://tools.ietf.org/html/rfc8927#section-2.2.1) form. + /// + /// The empty form will accept all inputs. It corresponds to the "top" type + /// of many programming language, like Java's `Object` or TypeScript's + /// `any`. Empty { definitions: Definitions, metadata: Metadata, }, + + /// The [ref](https://tools.ietf.org/html/rfc8927#section-2.2.2) form. + /// + /// The ref form accepts whatever the definition it refers to accepts. Ref { definitions: Definitions, metadata: Metadata, nullable: bool, + + /// The name of the definition being referred to. ref_: String, }, + + /// The [type](https://tools.ietf.org/html/rfc8927#section-2.2.3) form. + /// + /// The type form accepts JSON "primitives" (booleans, numbers, strings) + /// whose value fall within a certain "type". These types are enumerated in + /// [`Type`]. Type { definitions: Definitions, metadata: Metadata, nullable: bool, + + /// The type of primitive value accepted. type_: Type, }, + + /// The [enum](https://tools.ietf.org/html/rfc8927#section-2.2.4) form. + /// + /// The enum form accepts JSON strings whose values are within an enumerated + /// set. Enum { definitions: Definitions, metadata: Metadata, nullable: bool, + + /// The values the schema accepts. enum_: BTreeSet, }, + + /// The [elements](https://tools.ietf.org/html/rfc8927#section-2.2.5) form. + /// + /// The elements form accepts JSON arrays, and each element of the array is + /// validated against a sub-schema. Elements { definitions: Definitions, metadata: Metadata, nullable: bool, + + /// A schema for the elements of the array. elements: Box, }, + + /// The [properties](https://tools.ietf.org/html/rfc8927#section-2.2.6) + /// form. + /// + /// The properties form accepts JSON objects being used as "structs". Properties { definitions: Definitions, metadata: Metadata, nullable: bool, + + /// The required properties of the "struct", and the schema that each + /// must satisfy. properties: BTreeMap, + + /// The optional properties of the "struct", and the schema that each + /// must satisfy if present. optional_properties: BTreeMap, + + /// Whether the `properties` keyword is present on the schema. + /// + /// It is invalid to set this to `false` while having `properties` be + /// non-empty. + /// + /// This is used only to handle the corner case of a properties-form + /// schema being used to validate a non-object; in order to ensure the + /// returned `schema_path` points to a part of the schema that really + /// exists, validators need to be able to tell the difference between + /// `properties` being an empty object versus being omitted from the + /// schema. + /// + /// This field does not affect whether an input is valid. It only + /// affects the `schema_path` that will be returned if that input is not + /// an object. For more details, see the first sub-bullet after + /// "Otherwise" in [RFC 8927, Section + /// 3.3.6](https://tools.ietf.org/html/rfc8927#section-3.3.6). + /// + /// [`Schema::from_serde_schema`] correctly handles populating this + /// field. If you are constructing schemas by hand and want to play it + /// safe, it is always safe to set this to `true`. properties_is_present: bool, + + /// Whether additional properties not specified in `properties` or + /// `optional_properties` are permitted. additional_properties: bool, }, + + /// The [values](https://tools.ietf.org/html/rfc8927#section-2.2.7) form. + /// + /// The values form accepts JSON objects being used as "dictionaries"; each + /// value of the dictionary is validated against a sub-schema. Values { definitions: Definitions, metadata: Metadata, nullable: bool, + + /// A schema for the values of the "dictionary" object. values: Box, }, + + /// The [discriminator](https://tools.ietf.org/html/rfc8927#section-2.2.8) + /// form. + /// + /// The discriminator form accepts JSON objects being used as "discriminated + /// unions", or "tagged unions". Discriminator { definitions: Definitions, metadata: Metadata, nullable: bool, + + /// The "discriminator" property of the schema. + /// + /// For an input to be valid, this property must exist and its value + /// must be a key in `mapping`. discriminator: String, + + /// A mapping from the value of the `discriminator` property in the + /// input to a schema that the rest of the input (without the + /// `discriminator` property) must satisfy. mapping: BTreeMap, }, } +/// The values [`Schema::Type::type_`] may take on. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Type { + /// Either JSON `true` or `false`. Boolean, + + /// A JSON number with zero fractional part within the range of [`i8`]. Int8, + + /// A JSON number with zero fractional part within the range of [`u8`]. Uint8, + + /// A JSON number with zero fractional part within the range of [`i16`]. Int16, + + /// A JSON number with zero fractional part within the range of [`u16`]. Uint16, + + /// A JSON number with zero fractional part within the range of [`i32`]. Int32, + + /// A JSON number with zero fractional part within the range of [`u32`]. Uint32, + + /// A JSON number. Code generators will treat this like a Rust [`f32`]. Float32, + + /// A JSON number. Code generators will treat this like a Rust [`f64`]. Float64, + + /// A JSON string. String, + + /// A JSON string encoding a [RFC3339](https://tools.ietf.org/html/rfc3339) + /// timestamp. Timestamp, } +/// Errors that may arise from [`Schema::from_serde_schema`]. #[derive(Clone, Debug, PartialEq, Eq, Error)] pub enum FromSerdeSchemaError { + /// Indicates the schema uses an invalid combination of keywords. + /// + /// ``` + /// use jtd::{FromSerdeSchemaError, Schema, SerdeSchema}; + /// + /// assert_eq!( + /// Err(FromSerdeSchemaError::InvalidForm), + /// + /// // it's invalid to have both "type" and "enum" on a schema + /// Schema::from_serde_schema(SerdeSchema { + /// type_: Some("uint8".to_owned()), + /// enum_: Some(Default::default()), + /// ..Default::default() + /// }) + /// ) + /// ``` #[error("invalid combination of keywords in schema")] InvalidForm, + /// Indicates the schema uses a value for `type` that isn't in [`Type`]. + /// + /// ``` + /// use jtd::{FromSerdeSchemaError, Schema, SerdeSchema}; + /// + /// assert_eq!( + /// Err(FromSerdeSchemaError::InvalidType("uint64".to_owned())), + /// + /// // there is no uint64 in JSON Typedef + /// Schema::from_serde_schema(SerdeSchema { + /// type_: Some("uint64".to_owned()), + /// ..Default::default() + /// }) + /// ) + /// ``` #[error("invalid type: {0:?}")] InvalidType(String), + /// Indicates the schema has the same value appearing twice in an `enum`. + /// + /// ``` + /// use jtd::{FromSerdeSchemaError, Schema, SerdeSchema}; + /// + /// assert_eq!( + /// Err(FromSerdeSchemaError::DuplicatedEnumValue("foo".to_owned())), + /// + /// // it's invalid to have the same value appear twice in an enum array + /// Schema::from_serde_schema(SerdeSchema { + /// enum_: Some(vec!["foo".into(), "bar".into(), "foo".into()]), + /// ..Default::default() + /// }) + /// ) + /// ``` #[error("duplicated enum value: {0:?}")] DuplicatedEnumValue(String), } +/// Errors that may arise from [`Schema::validate`]. #[derive(Clone, Debug, PartialEq, Eq, Error)] pub enum SchemaValidateError { + /// Indicates the schema has a `ref` to a definition that doesn't exist. + /// + /// ``` + /// use jtd::{Schema, SchemaValidateError}; + /// + /// assert_eq!( + /// Err(SchemaValidateError::NoSuchDefinition("foo".into())), + /// + /// // a "ref" without definitions is always invalid + /// Schema::Ref { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// ref_: "foo".into(), + /// }.validate(), + /// ) + /// ``` #[error("no such definition: {0:?}")] NoSuchDefinition(String), + /// Indicates the schema has non-empty `definitions` below the root level. + /// + /// ``` + /// use jtd::{Schema, SchemaValidateError}; + /// + /// assert_eq!( + /// Err(SchemaValidateError::NonRootDefinitions), + /// + /// // definitions can only be present at the root level + /// Schema::Elements { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// elements: Box::new(Schema::Empty { + /// definitions: vec![( + /// "foo".to_owned(), + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// } + /// )].into_iter().collect(), + /// metadata: Default::default(), + /// }), + /// }.validate(), + /// ) + /// ``` #[error("non-root definitions")] NonRootDefinitions, + /// Indicates the schema has an `enum` with no values in it. + /// + /// ``` + /// use jtd::{Schema, SchemaValidateError}; + /// + /// assert_eq!( + /// Err(SchemaValidateError::EmptyEnum), + /// + /// // empty enums are illegal + /// Schema::Enum { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// enum_: Default::default(), + /// }.validate(), + /// ) + /// ``` #[error("empty enum")] EmptyEnum, + /// Indicates the schema has the same property appear in `properties` and + /// `optional_properties`. + /// + /// ``` + /// use jtd::{Schema, SchemaValidateError}; + /// + /// assert_eq!( + /// Err(SchemaValidateError::RepeatedProperty("foo".into())), + /// + /// // properties and optional_properties must not overlap + /// Schema::Properties { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// properties: vec![( + /// "foo".to_owned(), + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// }, + /// )].into_iter().collect(), + /// optional_properties: vec![( + /// "foo".to_owned(), + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// }, + /// )].into_iter().collect(), + /// properties_is_present: true, + /// additional_properties: false, + /// }.validate(), + /// ) + /// ``` #[error("property repeated in optionalProperties: {0:?}")] RepeatedProperty(String), + /// Indicates the schema has a value in `mapping` with `nullable` set to + /// `true`. + /// + /// ``` + /// use jtd::{Schema, SchemaValidateError}; + /// + /// assert_eq!( + /// Err(SchemaValidateError::NullableMapping), + /// + /// // mappings must not be nullable + /// Schema::Discriminator { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// discriminator: "foo".into(), + /// mapping: vec![( + /// "bar".to_owned(), + /// Schema::Properties { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: true, + /// properties: Default::default(), + /// optional_properties: Default::default(), + /// properties_is_present: true, + /// additional_properties: false, + /// } + /// )].into_iter().collect(), + /// }.validate(), + /// ); + /// ``` #[error("nullable schema in mapping")] NullableMapping, + /// Indicates the schema has a value in `mapping` that isn't a + /// [`Schema::Properties`]. + /// + /// ``` + /// use jtd::{Schema, SchemaValidateError}; + /// + /// assert_eq!( + /// Err(SchemaValidateError::NonPropertiesMapping), + /// + /// // mappings must be of the properties form + /// Schema::Discriminator { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// discriminator: "foo".into(), + /// mapping: vec![( + /// "bar".to_owned(), + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// } + /// )].into_iter().collect(), + /// }.validate(), + /// ); + /// ``` #[error("non-properties schema in mapping")] NonPropertiesMapping, + /// Indicates the schema has a value in `mapping` whose `properties` or + /// `optional_properties` contains `discriminator`. + /// + /// ``` + /// use jtd::{Schema, SchemaValidateError}; + /// + /// assert_eq!( + /// Err(SchemaValidateError::RepeatedDiscriminator("foo".into())), + /// + /// // mappings must not re-define the discriminator property + /// Schema::Discriminator { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// discriminator: "foo".into(), + /// mapping: vec![( + /// "bar".to_owned(), + /// Schema::Properties { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: Default::default(), + /// properties: vec![( + /// "foo".into(), + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// } + /// )].into_iter().collect(), + /// optional_properties: Default::default(), + /// properties_is_present: true, + /// additional_properties: false, + /// } + /// )].into_iter().collect(), + /// }.validate(), + /// ); + /// ``` #[error("discriminator redefined in mapping: {0:?}")] RepeatedDiscriminator(String), } @@ -171,6 +576,27 @@ const VALID_FORM_SIGNATURES: [[bool; 10]; 13] = [ ]; impl Schema { + /// Constructs a [`Schema`] from a [`SerdeSchema`]. + /// + /// ``` + /// use jtd::{Schema, SerdeSchema, Type}; + /// + /// assert_eq!( + /// Schema::Type { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: false, + /// type_: Type::Uint8, + /// }, + /// Schema::from_serde_schema(SerdeSchema { + /// type_: Some("uint8".to_owned()), + /// ..Default::default() + /// }).unwrap(), + /// ); + /// ``` + /// + /// See the documentation for [`FromSerdeSchemaError`] for examples of how + /// this function may return an error. pub fn from_serde_schema(serde_schema: SerdeSchema) -> Result { let mut definitions = BTreeMap::new(); for (name, sub_schema) in serde_schema.definitions.unwrap_or_default() { @@ -323,6 +749,23 @@ impl Schema { }) } + /// Ensures a [`Schema`] is well-formed. + /// + /// ``` + /// use jtd::{Schema, Type}; + /// + /// let schema = Schema::Type { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: false, + /// type_: Type::Uint8, + /// }; + /// + /// schema.validate().expect("Invalid schema"); + /// ``` + /// + /// See the documentation for [`SchemaValidateError`] for examples of how + /// this function may return an error. pub fn validate(&self) -> Result<(), SchemaValidateError> { self._validate(None) } @@ -416,6 +859,32 @@ impl Schema { Ok(()) } + /// Gets the schema's definitions. + /// + /// ``` + /// use jtd::{Definitions, Schema}; + /// + /// assert_eq!( + /// &vec![( + /// "foo".to_owned(), + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// }, + /// )].into_iter().collect::(), + /// + /// Schema::Empty { + /// definitions: vec![( + /// "foo".to_owned(), + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// }, + /// )].into_iter().collect(), + /// metadata: Default::default(), + /// }.definitions(), + /// ); + /// ``` pub fn definitions(&self) -> &BTreeMap { match self { Self::Empty { definitions, .. } => definitions, @@ -429,6 +898,27 @@ impl Schema { } } + /// Gets the schema's metadata. + /// + /// ``` + /// use jtd::{Metadata, Schema}; + /// use serde_json::json; + /// + /// assert_eq!( + /// &vec![( + /// "foo".to_owned(), + /// json!("bar"), + /// )].into_iter().collect::(), + /// + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: vec![( + /// "foo".to_owned(), + /// json!("bar"), + /// )].into_iter().collect(), + /// }.metadata(), + /// ); + /// ``` pub fn metadata(&self) -> &BTreeMap { match self { Self::Empty { metadata, .. } => metadata, @@ -442,6 +932,30 @@ impl Schema { } } + /// Gets whether the schema is nullable. + /// + /// For [`Schema::Empty`], this always returns true. For all other forms, + /// this fetches the `nullable` property. + /// + /// ``` + /// use jtd::{Schema, Type}; + /// + /// assert!( + /// Schema::Empty { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// }.nullable(), + /// ); + /// + /// assert!( + /// !Schema::Type { + /// definitions: Default::default(), + /// metadata: Default::default(), + /// nullable: false, + /// type_: Type::Uint8, + /// }.nullable(), + /// ); + /// ``` pub fn nullable(&self) -> bool { match self { Self::Empty { .. } => true, diff --git a/src/serde_schema.rs b/src/serde_schema.rs index d77b50b..4b1db98 100644 --- a/src/serde_schema.rs +++ b/src/serde_schema.rs @@ -2,10 +2,27 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::BTreeMap; +/// A JSON representation of JSON Typedef schemas, compatible with `serde_json`. +/// +/// To convert this into a [`Schema`][`crate::Schema`], see +/// [`Schema::from_serde_schema`][`crate::Schema::from_serde_schema`]. +/// +/// ``` +/// use jtd::SerdeSchema; +/// use serde_json::json; +/// +/// assert_eq!( +/// SerdeSchema { type_: Some("uint8".to_owned()), ..Default::default() }, +/// serde_json::from_value::(json!({ "type": "uint8" })).unwrap() +/// ) +/// ``` #[derive(Clone, Serialize, Deserialize, Debug, Default, PartialEq)] #[serde(rename_all = "camelCase")] #[serde(deny_unknown_fields)] pub struct SerdeSchema { + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub definitions: Option>, @@ -41,7 +58,4 @@ pub struct SerdeSchema { #[serde(skip_serializing_if = "Option::is_none")] pub mapping: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub metadata: Option>, } diff --git a/src/validate.rs b/src/validate.rs index eb06e25..8d4341e 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,67 +1,247 @@ use crate::{Schema, Type}; use chrono::DateTime; use serde_json::Value; +use std::borrow::Cow; use thiserror::Error; -#[derive(Default)] +/// Options you can pass to [`validate()`]. +#[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct ValidateOptions { max_depth: usize, max_errors: usize, } impl ValidateOptions { + /// Construct a new set of options with all default values. + /// + /// Equivalent to [`Default::default()`] or calling `with_max_depth(0)` and + /// `with_max_errors(0)`. pub fn new() -> Self { Self::default() } + /// Sets the maximum "depth" of references to following in [`validate()`]. + /// + /// This option exists to handle the possibility of an infinite loop in a + /// schema. For instance, this is a valid schema: + /// + /// ```json + /// { "ref": "loop", "definitions": { "loop": { "ref": "loop" }}} + /// ``` + /// + /// There are good reasons to sometimes have self-referential schemas -- for + /// instance, to describe a recursive data structure. What `with_max_depth` + /// does is limit how many recursive `ref` nodes will be followed before + /// [`validate()`] errors with [`ValidateError::MaxDepthExceeded`]. + /// + /// The default max depth of `0` indicates that no max depth should be + /// implemented. An infinite `ref` loop will eventually overflow the stack + /// during [`validate()`]. pub fn with_max_depth(mut self, max_depth: usize) -> Self { self.max_depth = max_depth; self } + /// Sets the maximum number of validation errors to return from + /// [`validate()`]. + /// + /// This option exists as an optimization for [`validate()`]. If all you + /// care about is whether an input is valid, then consider using + /// `set_max_errors(1)` to have [`validate()`] immediately return after + /// finding a validation error. + /// + /// The default max errors of `0` indicates that all errors will be + /// returned. pub fn with_max_errors(mut self, max_errors: usize) -> Self { self.max_errors = max_errors; self } } +/// Errors that may arise from [`validate()`]. #[derive(Clone, Debug, PartialEq, Eq, Error)] pub enum ValidateError { + /// The maximum depth, as specified by [`ValidateOptions::with_max_depth`], + /// was exceeded. + /// + /// ``` + /// use serde_json::json; + /// use jtd::{Schema, ValidateError, ValidateOptions}; + /// + /// let schema = Schema::from_serde_schema( + /// serde_json::from_value(json!({ + /// "definitions": { + /// "loop": { "ref": "loop" }, + /// }, + /// "ref": "loop", + /// })) + /// .unwrap(), + /// ) + /// .unwrap(); + /// + /// assert_eq!( + /// ValidateError::MaxDepthExceeded, + /// jtd::validate( + /// &schema, + /// &json!(null), + /// ValidateOptions::new().with_max_depth(3) + /// ) + /// .unwrap_err() + /// ) + /// ``` #[error("max depth exceeded")] MaxDepthExceeded, } +/// A single validation error returned by [`validate()`]. +/// +/// This type has *Indicator* at the end of its name to emphasize that it is +/// *not* a Rust error. It is an ordinary struct, and corresponds to the concept +/// of a validation error indicator in the JSON Typedef specification. See +/// [RFC8927, Section 3.2](https://tools.ietf.org/html/rfc8927#section-3.2). +/// +/// In order to avoid unncessary allocations, this struct uses +/// [`std::borrow::Cow`] instead of [`String`] directly. If you would prefer not +/// to have to deal with that, and are OK with copying all the data out of this +/// struct, then use +/// [`into_owned_paths`][`ValidationErrorIndicator::into_owned_paths`] to +/// convert instances of this type into a pair of plain old `Vec`s. #[derive(Clone, Debug, PartialEq, Eq)] -pub struct ValidationErrorIndicator { - pub instance_path: Vec, - pub schema_path: Vec, +pub struct ValidationErrorIndicator<'a> { + /// A path to the part of the instance that was rejected. + pub instance_path: Vec>, + + /// A path to the part of the schema that rejected the instance. + pub schema_path: Vec>, +} + +impl<'a> ValidationErrorIndicator<'a> { + /// Converts this struct into a `instance_path` and `schema_path` pair. + /// + /// This is a convenience function for those who don't want to manipulate + /// [`std::borrow::Cow`]. + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let indicator = jtd::ValidationErrorIndicator { + /// instance_path: vec![Cow::Borrowed("foo")], + /// schema_path: vec![Cow::Owned("bar".to_owned())], + /// }; + /// + /// let (instance_path, schema_path) = indicator.into_owned_paths(); + /// assert_eq!(vec!["foo".to_owned()], instance_path); + /// assert_eq!(vec!["bar".to_owned()], schema_path); + /// ``` + pub fn into_owned_paths(self) -> (Vec, Vec) { + ( + self.instance_path + .into_iter() + .map(|c| c.into_owned()) + .collect(), + self.schema_path + .into_iter() + .map(|c| c.into_owned()) + .collect(), + ) + } } -pub fn validate( - schema: &Schema, - instance: &Value, +/// Validates a schema against an instance, returning a set of error indicators. +/// +/// In keeping with the conventions of RFC8927, the "input" JSON -- the second +/// argument to this function -- is called an *instance*. +/// +/// The set of error indicators returned is specified by the JSON Typedef +/// specification. The ordering of those errors is not defined by the JSON +/// Typedef specification, and is subject to change in a future version of this +/// crate. +/// +/// ``` +/// use jtd::{Schema, ValidationErrorIndicator, ValidateOptions}; +/// use serde_json::json; +/// +/// let schema = Schema::from_serde_schema( +/// serde_json::from_value(json!({ +/// "elements": { +/// "type": "uint8" +/// } +/// })).unwrap()).unwrap(); +/// +/// let instance = serde_json::json!([ "a", "b", "c" ]); +/// +/// // By default, jtd::validate() will return all errors in the input. +/// let validate_options = ValidateOptions::new(); +/// let errors = jtd::validate(&schema, &instance, validate_options).unwrap(); +/// assert_eq!( +/// vec![ +/// ValidationErrorIndicator { +/// instance_path: vec!["0".to_owned().into()], +/// schema_path: vec!["elements".into(), "type".into()], +/// }, +/// ValidationErrorIndicator { +/// instance_path: vec!["1".to_owned().into()], +/// schema_path: vec!["elements".into(), "type".into()], +/// }, +/// ValidationErrorIndicator { +/// instance_path: vec!["2".to_owned().into()], +/// schema_path: vec!["elements".into(), "type".into()], +/// }, +/// ], +/// errors, +/// ); +/// +/// // If you don't care about validation errors beyond a certain amount of +/// // errors, use with_max_errors on the ValidateOptions you pass to validate. +/// let validate_options = ValidateOptions::new().with_max_errors(1); +/// let errors = jtd::validate(&schema, &instance, validate_options).unwrap(); +/// assert_eq!( +/// vec![ +/// ValidationErrorIndicator { +/// instance_path: vec!["0".to_owned().into()], +/// schema_path: vec!["elements".into(), "type".into()], +/// }, +/// ], +/// errors, +/// ); +/// ``` +/// +/// # Security considerations +/// +/// (This note is copied from [the top-level documentation][`crate`], because +/// it's important.) +/// +/// If you're running [`validate()`] with untrusted schemas (untrusted inputs is +/// fine), then be aware of this security consideration from RFC 8927: +/// +/// > Implementations that evaluate user-inputted schemas SHOULD implement +/// > mechanisms to detect and abort circular references that might cause a +/// > naive implementation to go into an infinite loop. Without such +/// > mechanisms, implementations may be vulnerable to denial-of-service +/// > attacks. +/// +/// This crate supports that "detect and abort" mechanism via +/// [`ValidateOptions::with_max_depth`]. Please see that documentation if you're +/// validating data against untrusted schemas. +pub fn validate<'a>( + schema: &'a Schema, + instance: &'a Value, options: ValidateOptions, -) -> Result, ValidateError> { - let mut vm = Vm { - max_depth: options.max_depth, - max_errors: options.max_errors, - instance_tokens: vec![], - schema_tokens: vec![vec![]], - errors: vec![], - }; - - match vm.validate(schema, schema, None, instance) { - Ok(()) | Err(VmValidateError::MaxErrorsReached) => Ok(vm.errors), +) -> Result>, ValidateError> { + let mut vm = Vm::new(schema, options); + + match vm.validate(schema, None, instance) { + Ok(()) | Err(VmValidateError::MaxErrorsReached) => Ok(vm.into_errors()), Err(VmValidateError::MaxDepthExceeded) => Err(ValidateError::MaxDepthExceeded), } } -struct Vm { - pub max_depth: usize, - pub max_errors: usize, - pub instance_tokens: Vec, - pub schema_tokens: Vec>, - pub errors: Vec, +struct Vm<'a> { + root: &'a Schema, + options: ValidateOptions, + instance_tokens: Vec>, + schema_tokens: Vec>>, + errors: Vec>, } enum VmValidateError { @@ -69,13 +249,26 @@ enum VmValidateError { MaxDepthExceeded, } -impl Vm { +impl<'a> Vm<'a> { + pub fn new(schema: &'a Schema, options: ValidateOptions) -> Self { + Self { + root: schema, + options, + instance_tokens: vec![], + schema_tokens: vec![vec![]], + errors: vec![], + } + } + + pub fn into_errors(self) -> Vec> { + self.errors + } + pub fn validate( &mut self, - root: &Schema, - schema: &Schema, - parent_tag: Option<&str>, - instance: &Value, + schema: &'a Schema, + parent_tag: Option<&'a str>, + instance: &'a Value, ) -> Result<(), VmValidateError> { if instance.is_null() && schema.nullable() { return Ok(()); @@ -85,12 +278,13 @@ impl Vm { Schema::Empty { .. } => {} Schema::Ref { ref_, .. } => { self.schema_tokens - .push(vec!["definitions".to_owned(), ref_.clone()]); - if self.schema_tokens.len() == self.max_depth { + .push(vec!["definitions".into(), ref_.into()]); + + if self.schema_tokens.len() == self.options.max_depth { return Err(VmValidateError::MaxDepthExceeded); } - self.validate(root, &root.definitions()[ref_], None, instance)?; + self.validate(&self.root.definitions()[ref_], None, instance)?; self.schema_tokens.pop(); } Schema::Type { type_, .. } => { @@ -147,8 +341,12 @@ impl Vm { if let Some(arr) = instance.as_array() { for (i, sub_instance) in arr.iter().enumerate() { - self.push_instance_token(&i.to_string()); - self.validate(root, elements, None, sub_instance)?; + // This is the only case where we push a non-Borrowed + // instance token. We handle pushing to instance_tokens + // manually here, to keep push_instance_token simpler. + self.instance_tokens.push(Cow::Owned(i.to_string())); + + self.validate(elements, None, sub_instance)?; self.pop_instance_token(); } } else { @@ -170,7 +368,7 @@ impl Vm { self.push_schema_token(name); if let Some(sub_instance) = obj.get(name) { self.push_instance_token(name); - self.validate(root, sub_schema, None, sub_instance)?; + self.validate(sub_schema, None, sub_instance)?; self.pop_instance_token(); } else { self.push_error()?; @@ -184,7 +382,7 @@ impl Vm { self.push_schema_token(name); if let Some(sub_instance) = obj.get(name) { self.push_instance_token(name); - self.validate(root, sub_schema, None, sub_instance)?; + self.validate(sub_schema, None, sub_instance)?; self.pop_instance_token(); } self.pop_schema_token(); @@ -219,7 +417,7 @@ impl Vm { if let Some(obj) = instance.as_object() { for (name, sub_instance) in obj { self.push_instance_token(name); - self.validate(root, values, None, sub_instance)?; + self.validate(values, None, sub_instance)?; self.pop_instance_token(); } } else { @@ -239,7 +437,7 @@ impl Vm { if let Some(schema) = mapping.get(tag) { self.push_schema_token("mapping"); self.push_schema_token(tag); - self.validate(root, schema, Some(discriminator), instance)?; + self.validate(schema, Some(discriminator), instance)?; self.pop_schema_token(); self.pop_schema_token(); } else { @@ -295,26 +493,23 @@ impl Vm { schema_path: self.schema_tokens.last().unwrap().clone(), }); - if self.max_errors == self.errors.len() { + if self.options.max_errors == self.errors.len() { Err(VmValidateError::MaxErrorsReached) } else { Ok(()) } } - fn push_schema_token(&mut self, token: &str) { - self.schema_tokens - .last_mut() - .unwrap() - .push(token.to_owned()); + fn push_schema_token(&mut self, token: &'a str) { + self.schema_tokens.last_mut().unwrap().push(token.into()); } fn pop_schema_token(&mut self) { self.schema_tokens.last_mut().unwrap().pop().unwrap(); } - fn push_instance_token(&mut self, token: &str) { - self.instance_tokens.push(token.to_owned()); + fn push_instance_token(&mut self, token: &'a str) { + self.instance_tokens.push(token.into()); } fn pop_instance_token(&mut self) { @@ -406,9 +601,10 @@ mod tests { super::validate(&schema, &test_case.instance, super::ValidateOptions::new()) .expect(&test_case_name) .into_iter() - .map(|err| TestCaseError { - instance_path: err.instance_path, - schema_path: err.schema_path, + .map(|err| err.into_owned_paths()) + .map(|(instance_path, schema_path)| TestCaseError { + instance_path, + schema_path, }) .collect(); From 080d7efe7f43819b2a3c8ad1f89b32d60eb0a2a6 Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Fri, 22 Jan 2021 11:44:20 -0800 Subject: [PATCH 4/7] Change publish to happen on GitHub release publication --- .github/workflows/publish.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 06d5c73..fec2345 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,7 +1,7 @@ on: - push: - branches: - - master + release: + types: [published] + jobs: publish: runs-on: ubuntu-latest From 6b389dfdf064ab43c29c9d077571a3ad0329fc82 Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Fri, 22 Jan 2021 11:51:38 -0800 Subject: [PATCH 5/7] Add README --- README.md | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..4e1c4b0 --- /dev/null +++ b/README.md @@ -0,0 +1,132 @@ +# json-typedef-rust: A Rust implementation of JSON Typedef ![Crates.io](https://img.shields.io/crates/v/jtd) ![Docs.rs](https://docs.rs/jtd/badge.svg) + +[JSON Type Definition](https://jsontypedef.com), aka +[RFC8927](https://tools.ietf.org/html/rfc8927), is an easy-to-learn, +standardized way to define a schema for JSON data. You can use JSON Typedef to +portably validate data across programming languages, create dummy data, generate +code, and more. + +`jtd` is a Rust implementation of JSON Typedef. You can use this crate to parse +JSON Typedef schemas, validate JSON data against those schemas, or build your +own tooling on top of JSON Typedef. + +Here's an example of this crate in action: + +```rust +use jtd::{Schema, ValidationErrorIndicator}; +use serde_json::json; + +let schema = Schema::from_serde_schema( + serde_json::from_value(json!({ + "properties": { + "name": { "type": "string" }, + "age": { "type": "uint32" }, + "phones": { + "elements": { + "type": "string" + } + } + } + })).unwrap()).unwrap(); + +// Since this first example is valid, we'll get back an empty list of +// validation errors. +let input_ok = json!({ + "name": "John Doe", + "age": 43, + "phones": ["+44 1234567", "+44 2345678"] +}); + +assert_eq!( + Vec::::new(), + jtd::validate(&schema, &input_ok, Default::default()).unwrap(), +); + +// This example is invalid, so we'll get back three validation errors: +// +// 1. "name" is required but not present, +// 2. "age" has the wrong type +// 3. "phones[1]" has the wrong type +let input_bad = json!({ + "age": "43", + "phones": ["+44 1234567", 442345678] +}); + +// Each error indicator has two pieces of information: the path to the part +// of the input that was rejected (the "instance path"), and the part of the +// schema that rejected it (the "schema path"). +// +// The exact values of the instance path and schema path is specified in the +// JSON Type Definition spec. +assert_eq!( + vec![ + // "age" has the wrong type (required by "/properties/age/type") + ValidationErrorIndicator { + instance_path: vec!["age".into()], + schema_path: vec!["properties".into(), "age".into(), "type".into()], + }, + + // "name" is missing (required by "/properties/name") + ValidationErrorIndicator { + instance_path: vec![], + schema_path: vec!["properties".into(), "name".into()], + }, + + // "phones/1" has the wrong type (required by "/properties/phones/elements/type") + ValidationErrorIndicator { + instance_path: vec!["phones".into(), "1".into()], + schema_path: vec![ + "properties".into(), + "phones".into(), + "elements".into(), + "type".into() + ], + }, + ], + jtd::validate(&schema, &input_bad, Default::default()).unwrap(), +); +``` + +## What is JSON Type Definition? + +[JSON Type Definition](https://jsontypedef.com) is a schema format for JSON +data. A JSON Type Definition schema describes what is and isn't a "valid" JSON +document. JSON Type Definition is easy to learn, portable (there are +functionally-identical implementations across many programming languages) and +standardized (the spec is set in stone as [IETF RFC +8927](https://tools.ietf.org/html/rfc8927)). + +Here's an example of a JSON Type Definition schema: + +```json +{ + "properties": { + "name": { + "type": "string" + }, + "isAdmin": { + "type": "boolean" + } + } +} +``` + +This schema considers any object with a `name` property (whose value must be a +string), an `isAdmin` property (whose value must a boolean), and no other +properties, to be valid. + +To learn more about JSON Type Definition, [check out the online documentation at +jsontypedef.com](https://jsontypedef.com). + +## Installation + +Install this crate by adding the following to your `Cargo.toml`: + +```toml +jtd = "0.2" +``` + +## Usage + +For detailed documentation on how to use this crate, consult [the full API +documentation on docs.rs](https://docs.rs/jtd). From 390de460884549b97d384016bb2b79748b7164a7 Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Fri, 22 Jan 2021 11:55:22 -0800 Subject: [PATCH 6/7] Refactor README title --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4e1c4b0..4d73ee0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# json-typedef-rust: A Rust implementation of JSON Typedef ![Crates.io](https://img.shields.io/crates/v/jtd) ![Docs.rs](https://docs.rs/jtd/badge.svg) +# jtd: JSON Typedef for Rust ![Crates.io](https://img.shields.io/crates/v/jtd) ![Docs.rs](https://docs.rs/jtd/badge.svg) [JSON Type Definition](https://jsontypedef.com), aka [RFC8927](https://tools.ietf.org/html/rfc8927), is an easy-to-learn, From aa8eed93319a7ccb2fcf1eba99cbd34dc5657a4d Mon Sep 17 00:00:00 2001 From: Ulysse Carion Date: Fri, 22 Jan 2021 11:59:37 -0800 Subject: [PATCH 7/7] Prepare for 0.3 --- Cargo.toml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4c99012..61137c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jtd" -version = "0.2.1" +version = "0.3.0" description = "A Rust implementation of JSON Type Definition" authors = ["JSON Type Definition Contributors"] edition = "2018" diff --git a/README.md b/README.md index 4d73ee0..062d9eb 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ jsontypedef.com](https://jsontypedef.com). Install this crate by adding the following to your `Cargo.toml`: ```toml -jtd = "0.2" +jtd = "0.3" ``` ## Usage