From febaf26d52a7b752ef6a456b8bb5cc2c95fa6b06 Mon Sep 17 00:00:00 2001 From: David Harsha Date: Sat, 14 Oct 2023 12:53:35 -0700 Subject: [PATCH] Support custom content encodings and media types This exposes hooks for the `contentEncoding` and `contentMediaType` keywords, similar to the existing custom formats behavior. The provided callables must return a tuple comprised of a validation boolean and annotation of any type. The validation boolean is ignored in draft 2019-09 and 2020-12, because the [specification][0] says: > They do not function as validation assertions; a malformed string-encoded document MUST NOT cause the containing instance to be considered invalid. Drafts 7 and earlier will return a validation error based on the validation boolean. From the [specification][1]: > Implementations MAY support the "contentMediaType" and "contentEncoding" keywords as validation assertions. All drafts forward the returned annotation as an annotation in the overall result. I don't love the API here, since it requires returning an array even when it's ignored in the latest drafts, but I couldn't come up with anything better. Closes: https://github.com/davishmcclurg/json_schemer/issues/137 [0]: https://json-schema.org/draft/2020-12/json-schema-validation#section-8.1 [1]: https://json-schema.org/draft-07/draft-handrews-json-schema-validation-01#rfc.section.8.2 --- README.md | 22 ++++++++ lib/json_schemer.rb | 11 ++++ lib/json_schemer/content.rb | 18 ++++++ lib/json_schemer/draft201909/meta.rb | 2 + lib/json_schemer/draft202012/meta.rb | 6 ++ lib/json_schemer/draft202012/vocab/content.rb | 12 +++- lib/json_schemer/draft4/meta.rb | 2 + lib/json_schemer/draft6/meta.rb | 2 + lib/json_schemer/draft7/meta.rb | 2 + lib/json_schemer/draft7/vocab/validation.rb | 8 +-- lib/json_schemer/format.rb | 26 --------- lib/json_schemer/openapi30/meta.rb | 2 +- lib/json_schemer/schema.rb | 26 ++++++++- test/json_schemer_test.rb | 56 ++++++++++++++++++- 14 files changed, 159 insertions(+), 36 deletions(-) create mode 100644 lib/json_schemer/content.rb diff --git a/README.md b/README.md index b119fa6a..2233a219 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,28 @@ JSONSchemer.schema( # default: true format: true, + # custom content encodings + # only `base64` is available by default + content_encodings: { + # return [success, annotation] tuple + 'urlsafe_base64' => proc do |instance| + [true, Base64.urlsafe_decode64(instance)] + rescue + [false, nil] + end + }, + + # custom content media types + # only `application/json` is available by default + content_media_types: { + # return [success, annotation] tuple + 'text/csv' => proc do |instance| + [true, CSV.parse(instance)] + rescue + [false, nil] + end + }, + # insert default property values during validation # true/false # default: false diff --git a/lib/json_schemer.rb b/lib/json_schemer.rb index ecce26d7..49ad1449 100644 --- a/lib/json_schemer.rb +++ b/lib/json_schemer.rb @@ -20,6 +20,7 @@ require 'json_schemer/format/uri_template' require 'json_schemer/format/email' require 'json_schemer/format' +require 'json_schemer/content' require 'json_schemer/errors' require 'json_schemer/cached_resolver' require 'json_schemer/ecma_regexp' @@ -146,6 +147,8 @@ def draft202012 Draft202012::SCHEMA, :base_uri => Draft202012::BASE_URI, :formats => Draft202012::FORMATS, + :content_encodings => Draft202012::CONTENT_ENCODINGS, + :content_media_types => Draft202012::CONTENT_MEDIA_TYPES, :ref_resolver => Draft202012::Meta::SCHEMAS.to_proc, :regexp_resolver => 'ecma' ) @@ -156,6 +159,8 @@ def draft201909 Draft201909::SCHEMA, :base_uri => Draft201909::BASE_URI, :formats => Draft201909::FORMATS, + :content_encodings => Draft201909::CONTENT_ENCODINGS, + :content_media_types => Draft201909::CONTENT_MEDIA_TYPES, :ref_resolver => Draft201909::Meta::SCHEMAS.to_proc, :regexp_resolver => 'ecma' ) @@ -167,6 +172,8 @@ def draft7 :vocabulary => { 'json-schemer://draft7' => true }, :base_uri => Draft7::BASE_URI, :formats => Draft7::FORMATS, + :content_encodings => Draft7::CONTENT_ENCODINGS, + :content_media_types => Draft7::CONTENT_MEDIA_TYPES, :regexp_resolver => 'ecma' ) end @@ -177,6 +184,8 @@ def draft6 :vocabulary => { 'json-schemer://draft6' => true }, :base_uri => Draft6::BASE_URI, :formats => Draft6::FORMATS, + :content_encodings => Draft6::CONTENT_ENCODINGS, + :content_media_types => Draft6::CONTENT_MEDIA_TYPES, :regexp_resolver => 'ecma' ) end @@ -187,6 +196,8 @@ def draft4 :vocabulary => { 'json-schemer://draft4' => true }, :base_uri => Draft4::BASE_URI, :formats => Draft4::FORMATS, + :content_encodings => Draft4::CONTENT_ENCODINGS, + :content_media_types => Draft4::CONTENT_MEDIA_TYPES, :regexp_resolver => 'ecma' ) end diff --git a/lib/json_schemer/content.rb b/lib/json_schemer/content.rb new file mode 100644 index 00000000..40ac3282 --- /dev/null +++ b/lib/json_schemer/content.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true +module JSONSchemer + module ContentEncoding + BASE64 = proc do |instance| + [true, Base64.strict_decode64(instance)] + rescue + [false, nil] + end + end + + module ContentMediaType + JSON = proc do |instance| + [true, ::JSON.parse(instance)] + rescue + [false, nil] + end + end +end diff --git a/lib/json_schemer/draft201909/meta.rb b/lib/json_schemer/draft201909/meta.rb index daeced90..360113d3 100644 --- a/lib/json_schemer/draft201909/meta.rb +++ b/lib/json_schemer/draft201909/meta.rb @@ -3,6 +3,8 @@ module JSONSchemer module Draft201909 BASE_URI = URI('https://json-schema.org/draft/2019-09/schema') FORMATS = Draft202012::FORMATS + CONTENT_ENCODINGS = Draft202012::CONTENT_ENCODINGS + CONTENT_MEDIA_TYPES = Draft202012::CONTENT_MEDIA_TYPES SCHEMA = { '$schema' => 'https://json-schema.org/draft/2019-09/schema', '$id' => 'https://json-schema.org/draft/2019-09/schema', diff --git a/lib/json_schemer/draft202012/meta.rb b/lib/json_schemer/draft202012/meta.rb index 14e47c81..c0e02e7d 100644 --- a/lib/json_schemer/draft202012/meta.rb +++ b/lib/json_schemer/draft202012/meta.rb @@ -23,6 +23,12 @@ module Draft202012 'relative-json-pointer' => Format::RELATIVE_JSON_POINTER, 'regex' => Format::REGEX } + CONTENT_ENCODINGS = { + 'base64' => ContentEncoding::BASE64 + } + CONTENT_MEDIA_TYPES = { + 'application/json' => ContentMediaType::JSON + } SCHEMA = { '$schema' => 'https://json-schema.org/draft/2020-12/schema', '$id' => 'https://json-schema.org/draft/2020-12/schema', diff --git a/lib/json_schemer/draft202012/vocab/content.rb b/lib/json_schemer/draft202012/vocab/content.rb index abdefd95..8bfbbd75 100644 --- a/lib/json_schemer/draft202012/vocab/content.rb +++ b/lib/json_schemer/draft202012/vocab/content.rb @@ -4,21 +4,29 @@ module Draft202012 module Vocab module Content class ContentEncoding < Keyword + def parse + root.fetch_content_encoding(value) { raise UnknownContentEncoding, value } + end + def validate(instance, instance_location, keyword_location, _context) return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String) - _valid, annotation = Format.decode_content_encoding(instance, value) + _valid, annotation = parsed.call(instance) result(instance, instance_location, keyword_location, true, :annotation => annotation) end end class ContentMediaType < Keyword + def parse + root.fetch_content_media_type(value) { raise UnknownContentMediaType, value } + end + def validate(instance, instance_location, keyword_location, context) return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String) decoded_instance = context.adjacent_results[ContentEncoding]&.annotation || instance - _valid, annotation = Format.parse_content_media_type(decoded_instance, value) + _valid, annotation = parsed.call(decoded_instance) result(instance, instance_location, keyword_location, true, :annotation => annotation) end diff --git a/lib/json_schemer/draft4/meta.rb b/lib/json_schemer/draft4/meta.rb index 10fc977b..14d9c27b 100644 --- a/lib/json_schemer/draft4/meta.rb +++ b/lib/json_schemer/draft4/meta.rb @@ -6,6 +6,8 @@ module Draft4 FORMATS.delete('uri-reference') FORMATS.delete('uri-template') FORMATS.delete('json-pointer') + CONTENT_ENCODINGS = Draft6::CONTENT_ENCODINGS + CONTENT_MEDIA_TYPES = Draft6::CONTENT_MEDIA_TYPES SCHEMA = { 'id' => 'http://json-schema.org/draft-04/schema#', '$schema' => 'http://json-schema.org/draft-04/schema#', diff --git a/lib/json_schemer/draft6/meta.rb b/lib/json_schemer/draft6/meta.rb index 5fb958e0..502a0268 100644 --- a/lib/json_schemer/draft6/meta.rb +++ b/lib/json_schemer/draft6/meta.rb @@ -11,6 +11,8 @@ module Draft6 FORMATS.delete('iri-reference') FORMATS.delete('relative-json-pointer') FORMATS.delete('regex') + CONTENT_ENCODINGS = Draft7::CONTENT_ENCODINGS + CONTENT_MEDIA_TYPES = Draft7::CONTENT_MEDIA_TYPES SCHEMA = { '$schema' => 'http://json-schema.org/draft-06/schema#', '$id' => 'http://json-schema.org/draft-06/schema#', diff --git a/lib/json_schemer/draft7/meta.rb b/lib/json_schemer/draft7/meta.rb index 34396da9..832aa857 100644 --- a/lib/json_schemer/draft7/meta.rb +++ b/lib/json_schemer/draft7/meta.rb @@ -5,6 +5,8 @@ module Draft7 FORMATS = Draft201909::FORMATS.dup FORMATS.delete('duration') FORMATS.delete('uuid') + CONTENT_ENCODINGS = Draft201909::CONTENT_ENCODINGS + CONTENT_MEDIA_TYPES = Draft201909::CONTENT_MEDIA_TYPES SCHEMA = { '$schema' => 'http://json-schema.org/draft-07/schema#', '$id' => 'http://json-schema.org/draft-07/schema#', diff --git a/lib/json_schemer/draft7/vocab/validation.rb b/lib/json_schemer/draft7/vocab/validation.rb index c7df92ef..f38f0810 100644 --- a/lib/json_schemer/draft7/vocab/validation.rb +++ b/lib/json_schemer/draft7/vocab/validation.rb @@ -35,7 +35,7 @@ def validate(instance, instance_location, keyword_location, context) end end - class ContentEncoding < Keyword + class ContentEncoding < Draft202012::Vocab::Content::ContentEncoding def error(formatted_instance_location:, **) "string at #{formatted_instance_location} could not be decoded using encoding: #{value}" end @@ -43,13 +43,13 @@ def error(formatted_instance_location:, **) def validate(instance, instance_location, keyword_location, _context) return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String) - valid, annotation = Format.decode_content_encoding(instance, value) + valid, annotation = parsed.call(instance) result(instance, instance_location, keyword_location, valid, :annotation => annotation) end end - class ContentMediaType < Keyword + class ContentMediaType < Draft202012::Vocab::Content::ContentMediaType def error(formatted_instance_location:, **) "string at #{formatted_instance_location} could not be parsed using media type: #{value}" end @@ -58,7 +58,7 @@ def validate(instance, instance_location, keyword_location, context) return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String) decoded_instance = context.adjacent_results[ContentEncoding]&.annotation || instance - valid, annotation = Format.parse_content_media_type(decoded_instance, value) + valid, annotation = parsed.call(decoded_instance) result(instance, instance_location, keyword_location, valid, :annotation => annotation) end diff --git a/lib/json_schemer/format.rb b/lib/json_schemer/format.rb index c11e339e..fa96cc28 100644 --- a/lib/json_schemer/format.rb +++ b/lib/json_schemer/format.rb @@ -93,32 +93,6 @@ def percent_encode(data, regexp) data.force_encoding(Encoding::US_ASCII) end - def decode_content_encoding(data, content_encoding) - case content_encoding - when 'base64' - begin - [true, Base64.strict_decode64(data)] - rescue - [false, nil] - end - else - raise UnknownContentEncoding, content_encoding - end - end - - def parse_content_media_type(data, content_media_type) - case content_media_type - when 'application/json' - begin - [true, JSON.parse(data)] - rescue - [false, nil] - end - else - raise UnknownContentMediaType, content_media_type - end - end - def valid_date_time?(data) return false if HOUR_24_REGEX.match?(data) datetime = DateTime.rfc3339(data) diff --git a/lib/json_schemer/openapi30/meta.rb b/lib/json_schemer/openapi30/meta.rb index c5bcf478..4b62eb86 100644 --- a/lib/json_schemer/openapi30/meta.rb +++ b/lib/json_schemer/openapi30/meta.rb @@ -4,7 +4,7 @@ module OpenAPI30 BASE_URI = URI('json-schemer://openapi30/schema') # https://spec.openapis.org/oas/v3.0.3#data-types FORMATS = OpenAPI31::FORMATS.merge( - 'byte' => proc { |instance, _value| Format.decode_content_encoding(instance, 'base64').first }, + 'byte' => proc { |instance, _value| ContentEncoding::BASE64.call(instance).first }, 'binary' => proc { |instance, _value| instance.is_a?(String) && instance.encoding == Encoding::ASCII_8BIT }, 'date' => Format::DATE ) diff --git a/lib/json_schemer/schema.rb b/lib/json_schemer/schema.rb index 4b760fd0..1c2985bb 100644 --- a/lib/json_schemer/schema.rb +++ b/lib/json_schemer/schema.rb @@ -20,6 +20,8 @@ def original_instance(instance_location) PROPERTIES_KEYWORD_CLASS = Draft202012::Vocab::Applicator::Properties DEFAULT_BASE_URI = URI('json-schemer://schema').freeze DEFAULT_FORMATS = {}.freeze + DEFAULT_CONTENT_ENCODINGS = {}.freeze + DEFAULT_CONTENT_MEDIA_TYPES = {}.freeze DEFAULT_KEYWORDS = {}.freeze DEFAULT_BEFORE_PROPERTY_VALIDATION = [].freeze DEFAULT_AFTER_PROPERTY_VALIDATION = [].freeze @@ -41,7 +43,7 @@ def original_instance(instance_location) attr_accessor :base_uri, :meta_schema, :keywords, :keyword_order attr_reader :value, :parent, :root, :parsed - attr_reader :vocabulary, :format, :formats, :custom_keywords, :before_property_validation, :after_property_validation, :insert_property_defaults, :property_default_resolver + attr_reader :vocabulary, :format, :formats, :content_encodings, :content_media_types, :custom_keywords, :before_property_validation, :after_property_validation, :insert_property_defaults, :property_default_resolver def initialize( value, @@ -53,6 +55,8 @@ def initialize( vocabulary: nil, format: true, formats: DEFAULT_FORMATS, + content_encodings: DEFAULT_CONTENT_ENCODINGS, + content_media_types: DEFAULT_CONTENT_MEDIA_TYPES, keywords: DEFAULT_KEYWORDS, before_property_validation: DEFAULT_BEFORE_PROPERTY_VALIDATION, after_property_validation: DEFAULT_AFTER_PROPERTY_VALIDATION, @@ -74,6 +78,8 @@ def initialize( @vocabulary = vocabulary @format = format @formats = formats + @content_encodings = content_encodings + @content_media_types = content_media_types @custom_keywords = keywords @before_property_validation = Array(before_property_validation) @after_property_validation = Array(after_property_validation) @@ -182,6 +188,8 @@ def resolve_ref(uri) :meta_schema => meta_schema, :format => format, :formats => formats, + :content_encodings => content_encodings, + :content_media_types => content_media_types, :keywords => custom_keywords, :before_property_validation => before_property_validation, :after_property_validation => after_property_validation, @@ -295,6 +303,22 @@ def fetch_format(format, *args, &block) end end + def fetch_content_encoding(content_encoding, *args, &block) + if meta_schema == self + content_encodings.fetch(content_encoding, *args, &block) + else + content_encodings.fetch(content_encoding) { meta_schema.fetch_content_encoding(content_encoding, *args, &block) } + end + end + + def fetch_content_media_type(content_media_type, *args, &block) + if meta_schema == self + content_media_types.fetch(content_media_type, *args, &block) + else + content_media_types.fetch(content_media_type) { meta_schema.fetch_content_media_type(content_media_type, *args, &block) } + end + end + def id_keyword @id_keyword ||= (keywords.key?('$id') ? '$id' : 'id') end diff --git a/test/json_schemer_test.rb b/test/json_schemer_test.rb index bfa1b012..346ba11c 100644 --- a/test/json_schemer_test.rb +++ b/test/json_schemer_test.rb @@ -1,4 +1,5 @@ require 'test_helper' +require 'csv' class JSONSchemerTest < Minitest::Test def test_that_it_has_a_version_number @@ -237,11 +238,11 @@ def test_it_ignores_invalid_types end def test_it_raises_for_unsupported_content_encoding - assert_raises(JSONSchemer::UnknownContentEncoding) { JSONSchemer.schema({ 'contentEncoding' => '7bit' }).valid?('') } + assert_raises(JSONSchemer::UnknownContentEncoding) { JSONSchemer.schema({ 'contentEncoding' => '7bit' }) } end def test_it_raises_for_unsupported_content_media_type - assert_raises(JSONSchemer::UnknownContentMediaType) { JSONSchemer.schema({ 'contentMediaType' => 'application/xml' }).valid?('') } + assert_raises(JSONSchemer::UnknownContentMediaType) { JSONSchemer.schema({ 'contentMediaType' => 'application/xml' }) } end def test_it_raises_for_required_unknown_vocabulary @@ -547,4 +548,55 @@ def test_bundle_exclusive_ref assert(bundle.valid?('yah')) refute(bundle.valid?('nah')) end + + def test_custom_content_encodings_and_media_types + data = '😊' + instance = Base64.urlsafe_encode64(data) + schema = { + 'contentEncoding' => 'urlsafe_base64', + 'contentMediaType' => 'text/csv' + } + content_encodings = { + 'urlsafe_base64' => proc do |instance| + [true, Base64.urlsafe_decode64(instance).force_encoding('utf-8')] + rescue + [false, nil] + end + } + content_media_types = { + 'text/csv' => proc do |instance| + [true, CSV.parse(instance)] + rescue + [false, nil] + end + } + + refute(JSONSchemer.schema({ 'contentEncoding' => 'base64' }).validate(instance, :output_format => 'basic').fetch('annotations').first.key?('annotation')) + + schemer = JSONSchemer.schema(schema, :content_encodings => content_encodings, :content_media_types => content_media_types) + + assert_nil(annotation(schemer.validate('invalid', :output_format => 'basic'), '/contentEncoding')) + assert_nil(annotation(schemer.validate(Base64.urlsafe_encode64("#{data}\""), :output_format => 'basic'), '/contentMediaType')) + + result = schemer.validate(instance, :output_format => 'basic') + assert_equal(data, annotation(result, '/contentEncoding')) + assert_equal([[data]], annotation(result, '/contentMediaType')) + + draft7_schemer = JSONSchemer.schema( + schema, + :meta_schema => JSONSchemer.draft7, + :content_encodings => content_encodings, + :content_media_types => content_media_types + ) + + assert(draft7_schemer.valid?(instance)) + refute(draft7_schemer.valid?('invalid')) + refute(draft7_schemer.valid?(Base64.urlsafe_encode64("#{data}\""))) + end + +private + + def annotation(result, keyword_location) + result.fetch('annotations').find { |annotation| annotation.fetch('keywordLocation') == keyword_location }['annotation'] + end end