Skip to content

Commit

Permalink
Support custom content encodings and media types
Browse files Browse the repository at this point in the history
This exposes hooks for the `contentEncoding` and `contentMediaType`
keywords, similar to the existing custom formats behavior. The provided
callables must return a tuple comprised of a validation boolean and
annotation of any type. The validation boolean is ignored in draft
2019-09 and 2020-12, because the [specification][0] says:

> They do not function as validation assertions; a malformed string-encoded document MUST NOT cause the containing instance to be considered invalid.

Drafts 7 and earlier will return a validation error based on the
validation boolean. From the [specification][1]:

> Implementations MAY support the "contentMediaType" and "contentEncoding" keywords as validation assertions.

All drafts forward the returned annotation as an annotation in the
overall result.

I don't love the API here, since it requires returning an array even
when it's ignored in the latest drafts, but I couldn't come up with
anything better.

Closes: #137

[0]: https://json-schema.org/draft/2020-12/json-schema-validation#section-8.1
[1]: https://json-schema.org/draft-07/draft-handrews-json-schema-validation-01#rfc.section.8.2
  • Loading branch information
davishmcclurg committed Oct 24, 2023
1 parent 69fe7a8 commit febaf26
Show file tree
Hide file tree
Showing 14 changed files with 159 additions and 36 deletions.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,28 @@ JSONSchemer.schema(
# default: true
format: true,

# custom content encodings
# only `base64` is available by default
content_encodings: {
# return [success, annotation] tuple
'urlsafe_base64' => proc do |instance|
[true, Base64.urlsafe_decode64(instance)]
rescue
[false, nil]
end
},

# custom content media types
# only `application/json` is available by default
content_media_types: {
# return [success, annotation] tuple
'text/csv' => proc do |instance|
[true, CSV.parse(instance)]
rescue
[false, nil]
end
},

# insert default property values during validation
# true/false
# default: false
Expand Down
11 changes: 11 additions & 0 deletions lib/json_schemer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
require 'json_schemer/format/uri_template'
require 'json_schemer/format/email'
require 'json_schemer/format'
require 'json_schemer/content'
require 'json_schemer/errors'
require 'json_schemer/cached_resolver'
require 'json_schemer/ecma_regexp'
Expand Down Expand Up @@ -146,6 +147,8 @@ def draft202012
Draft202012::SCHEMA,
:base_uri => Draft202012::BASE_URI,
:formats => Draft202012::FORMATS,
:content_encodings => Draft202012::CONTENT_ENCODINGS,
:content_media_types => Draft202012::CONTENT_MEDIA_TYPES,
:ref_resolver => Draft202012::Meta::SCHEMAS.to_proc,
:regexp_resolver => 'ecma'
)
Expand All @@ -156,6 +159,8 @@ def draft201909
Draft201909::SCHEMA,
:base_uri => Draft201909::BASE_URI,
:formats => Draft201909::FORMATS,
:content_encodings => Draft201909::CONTENT_ENCODINGS,
:content_media_types => Draft201909::CONTENT_MEDIA_TYPES,
:ref_resolver => Draft201909::Meta::SCHEMAS.to_proc,
:regexp_resolver => 'ecma'
)
Expand All @@ -167,6 +172,8 @@ def draft7
:vocabulary => { 'json-schemer://draft7' => true },
:base_uri => Draft7::BASE_URI,
:formats => Draft7::FORMATS,
:content_encodings => Draft7::CONTENT_ENCODINGS,
:content_media_types => Draft7::CONTENT_MEDIA_TYPES,
:regexp_resolver => 'ecma'
)
end
Expand All @@ -177,6 +184,8 @@ def draft6
:vocabulary => { 'json-schemer://draft6' => true },
:base_uri => Draft6::BASE_URI,
:formats => Draft6::FORMATS,
:content_encodings => Draft6::CONTENT_ENCODINGS,
:content_media_types => Draft6::CONTENT_MEDIA_TYPES,
:regexp_resolver => 'ecma'
)
end
Expand All @@ -187,6 +196,8 @@ def draft4
:vocabulary => { 'json-schemer://draft4' => true },
:base_uri => Draft4::BASE_URI,
:formats => Draft4::FORMATS,
:content_encodings => Draft4::CONTENT_ENCODINGS,
:content_media_types => Draft4::CONTENT_MEDIA_TYPES,
:regexp_resolver => 'ecma'
)
end
Expand Down
18 changes: 18 additions & 0 deletions lib/json_schemer/content.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true
module JSONSchemer
module ContentEncoding
BASE64 = proc do |instance|
[true, Base64.strict_decode64(instance)]
rescue
[false, nil]
end
end

module ContentMediaType
JSON = proc do |instance|
[true, ::JSON.parse(instance)]
rescue
[false, nil]
end
end
end
2 changes: 2 additions & 0 deletions lib/json_schemer/draft201909/meta.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ module JSONSchemer
module Draft201909
BASE_URI = URI('https://json-schema.org/draft/2019-09/schema')
FORMATS = Draft202012::FORMATS
CONTENT_ENCODINGS = Draft202012::CONTENT_ENCODINGS
CONTENT_MEDIA_TYPES = Draft202012::CONTENT_MEDIA_TYPES
SCHEMA = {
'$schema' => 'https://json-schema.org/draft/2019-09/schema',
'$id' => 'https://json-schema.org/draft/2019-09/schema',
Expand Down
6 changes: 6 additions & 0 deletions lib/json_schemer/draft202012/meta.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ module Draft202012
'relative-json-pointer' => Format::RELATIVE_JSON_POINTER,
'regex' => Format::REGEX
}
CONTENT_ENCODINGS = {
'base64' => ContentEncoding::BASE64
}
CONTENT_MEDIA_TYPES = {
'application/json' => ContentMediaType::JSON
}
SCHEMA = {
'$schema' => 'https://json-schema.org/draft/2020-12/schema',
'$id' => 'https://json-schema.org/draft/2020-12/schema',
Expand Down
12 changes: 10 additions & 2 deletions lib/json_schemer/draft202012/vocab/content.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,29 @@ module Draft202012
module Vocab
module Content
class ContentEncoding < Keyword
def parse
root.fetch_content_encoding(value) { raise UnknownContentEncoding, value }
end

def validate(instance, instance_location, keyword_location, _context)
return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String)

_valid, annotation = Format.decode_content_encoding(instance, value)
_valid, annotation = parsed.call(instance)

result(instance, instance_location, keyword_location, true, :annotation => annotation)
end
end

class ContentMediaType < Keyword
def parse
root.fetch_content_media_type(value) { raise UnknownContentMediaType, value }
end

def validate(instance, instance_location, keyword_location, context)
return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String)

decoded_instance = context.adjacent_results[ContentEncoding]&.annotation || instance
_valid, annotation = Format.parse_content_media_type(decoded_instance, value)
_valid, annotation = parsed.call(decoded_instance)

result(instance, instance_location, keyword_location, true, :annotation => annotation)
end
Expand Down
2 changes: 2 additions & 0 deletions lib/json_schemer/draft4/meta.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ module Draft4
FORMATS.delete('uri-reference')
FORMATS.delete('uri-template')
FORMATS.delete('json-pointer')
CONTENT_ENCODINGS = Draft6::CONTENT_ENCODINGS
CONTENT_MEDIA_TYPES = Draft6::CONTENT_MEDIA_TYPES
SCHEMA = {
'id' => 'http://json-schema.org/draft-04/schema#',
'$schema' => 'http://json-schema.org/draft-04/schema#',
Expand Down
2 changes: 2 additions & 0 deletions lib/json_schemer/draft6/meta.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ module Draft6
FORMATS.delete('iri-reference')
FORMATS.delete('relative-json-pointer')
FORMATS.delete('regex')
CONTENT_ENCODINGS = Draft7::CONTENT_ENCODINGS
CONTENT_MEDIA_TYPES = Draft7::CONTENT_MEDIA_TYPES
SCHEMA = {
'$schema' => 'http://json-schema.org/draft-06/schema#',
'$id' => 'http://json-schema.org/draft-06/schema#',
Expand Down
2 changes: 2 additions & 0 deletions lib/json_schemer/draft7/meta.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ module Draft7
FORMATS = Draft201909::FORMATS.dup
FORMATS.delete('duration')
FORMATS.delete('uuid')
CONTENT_ENCODINGS = Draft201909::CONTENT_ENCODINGS
CONTENT_MEDIA_TYPES = Draft201909::CONTENT_MEDIA_TYPES
SCHEMA = {
'$schema' => 'http://json-schema.org/draft-07/schema#',
'$id' => 'http://json-schema.org/draft-07/schema#',
Expand Down
8 changes: 4 additions & 4 deletions lib/json_schemer/draft7/vocab/validation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,21 @@ def validate(instance, instance_location, keyword_location, context)
end
end

class ContentEncoding < Keyword
class ContentEncoding < Draft202012::Vocab::Content::ContentEncoding
def error(formatted_instance_location:, **)
"string at #{formatted_instance_location} could not be decoded using encoding: #{value}"
end

def validate(instance, instance_location, keyword_location, _context)
return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String)

valid, annotation = Format.decode_content_encoding(instance, value)
valid, annotation = parsed.call(instance)

result(instance, instance_location, keyword_location, valid, :annotation => annotation)
end
end

class ContentMediaType < Keyword
class ContentMediaType < Draft202012::Vocab::Content::ContentMediaType
def error(formatted_instance_location:, **)
"string at #{formatted_instance_location} could not be parsed using media type: #{value}"
end
Expand All @@ -58,7 +58,7 @@ def validate(instance, instance_location, keyword_location, context)
return result(instance, instance_location, keyword_location, true) unless instance.is_a?(String)

decoded_instance = context.adjacent_results[ContentEncoding]&.annotation || instance
valid, annotation = Format.parse_content_media_type(decoded_instance, value)
valid, annotation = parsed.call(decoded_instance)

result(instance, instance_location, keyword_location, valid, :annotation => annotation)
end
Expand Down
26 changes: 0 additions & 26 deletions lib/json_schemer/format.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,32 +93,6 @@ def percent_encode(data, regexp)
data.force_encoding(Encoding::US_ASCII)
end

def decode_content_encoding(data, content_encoding)
case content_encoding
when 'base64'
begin
[true, Base64.strict_decode64(data)]
rescue
[false, nil]
end
else
raise UnknownContentEncoding, content_encoding
end
end

def parse_content_media_type(data, content_media_type)
case content_media_type
when 'application/json'
begin
[true, JSON.parse(data)]
rescue
[false, nil]
end
else
raise UnknownContentMediaType, content_media_type
end
end

def valid_date_time?(data)
return false if HOUR_24_REGEX.match?(data)
datetime = DateTime.rfc3339(data)
Expand Down
2 changes: 1 addition & 1 deletion lib/json_schemer/openapi30/meta.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module OpenAPI30
BASE_URI = URI('json-schemer://openapi30/schema')
# https://spec.openapis.org/oas/v3.0.3#data-types
FORMATS = OpenAPI31::FORMATS.merge(
'byte' => proc { |instance, _value| Format.decode_content_encoding(instance, 'base64').first },
'byte' => proc { |instance, _value| ContentEncoding::BASE64.call(instance).first },
'binary' => proc { |instance, _value| instance.is_a?(String) && instance.encoding == Encoding::ASCII_8BIT },
'date' => Format::DATE
)
Expand Down
26 changes: 25 additions & 1 deletion lib/json_schemer/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def original_instance(instance_location)
PROPERTIES_KEYWORD_CLASS = Draft202012::Vocab::Applicator::Properties
DEFAULT_BASE_URI = URI('json-schemer://schema').freeze
DEFAULT_FORMATS = {}.freeze
DEFAULT_CONTENT_ENCODINGS = {}.freeze
DEFAULT_CONTENT_MEDIA_TYPES = {}.freeze
DEFAULT_KEYWORDS = {}.freeze
DEFAULT_BEFORE_PROPERTY_VALIDATION = [].freeze
DEFAULT_AFTER_PROPERTY_VALIDATION = [].freeze
Expand All @@ -41,7 +43,7 @@ def original_instance(instance_location)

attr_accessor :base_uri, :meta_schema, :keywords, :keyword_order
attr_reader :value, :parent, :root, :parsed
attr_reader :vocabulary, :format, :formats, :custom_keywords, :before_property_validation, :after_property_validation, :insert_property_defaults, :property_default_resolver
attr_reader :vocabulary, :format, :formats, :content_encodings, :content_media_types, :custom_keywords, :before_property_validation, :after_property_validation, :insert_property_defaults, :property_default_resolver

def initialize(
value,
Expand All @@ -53,6 +55,8 @@ def initialize(
vocabulary: nil,
format: true,
formats: DEFAULT_FORMATS,
content_encodings: DEFAULT_CONTENT_ENCODINGS,
content_media_types: DEFAULT_CONTENT_MEDIA_TYPES,
keywords: DEFAULT_KEYWORDS,
before_property_validation: DEFAULT_BEFORE_PROPERTY_VALIDATION,
after_property_validation: DEFAULT_AFTER_PROPERTY_VALIDATION,
Expand All @@ -74,6 +78,8 @@ def initialize(
@vocabulary = vocabulary
@format = format
@formats = formats
@content_encodings = content_encodings
@content_media_types = content_media_types
@custom_keywords = keywords
@before_property_validation = Array(before_property_validation)
@after_property_validation = Array(after_property_validation)
Expand Down Expand Up @@ -182,6 +188,8 @@ def resolve_ref(uri)
:meta_schema => meta_schema,
:format => format,
:formats => formats,
:content_encodings => content_encodings,
:content_media_types => content_media_types,
:keywords => custom_keywords,
:before_property_validation => before_property_validation,
:after_property_validation => after_property_validation,
Expand Down Expand Up @@ -295,6 +303,22 @@ def fetch_format(format, *args, &block)
end
end

def fetch_content_encoding(content_encoding, *args, &block)
if meta_schema == self
content_encodings.fetch(content_encoding, *args, &block)
else
content_encodings.fetch(content_encoding) { meta_schema.fetch_content_encoding(content_encoding, *args, &block) }
end
end

def fetch_content_media_type(content_media_type, *args, &block)
if meta_schema == self
content_media_types.fetch(content_media_type, *args, &block)
else
content_media_types.fetch(content_media_type) { meta_schema.fetch_content_media_type(content_media_type, *args, &block) }
end
end

def id_keyword
@id_keyword ||= (keywords.key?('$id') ? '$id' : 'id')
end
Expand Down
Loading

0 comments on commit febaf26

Please sign in to comment.