Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support nested typeDSL #711

Merged
merged 9 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion schema_salad/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def codegen(
else ".".join(list(reversed(sp.netloc.split("."))) + sp.path.strip("/").split("/"))
)
info = parser_info or pkg
salad_version = schema_metadata.get("saladVersion", "v1.1")

if lang in set(["python", "cpp", "dlang"]):
if target:
dest: Union[TextIOWrapper, TextIO] = open(target, mode="w", encoding="utf-8")
Expand All @@ -83,7 +85,9 @@ def codegen(
)
gen.parse(j)
return
gen = PythonCodeGen(dest, copyright=copyright, parser_info=info)
gen = PythonCodeGen(
dest, copyright=copyright, parser_info=info, salad_version=salad_version
)

elif lang == "java":
gen = JavaCodeGen(
Expand Down
5 changes: 4 additions & 1 deletion schema_salad/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,10 @@ def main(argsl: Optional[List[str]] = None) -> int:
raise ValidationException(f"Expected a CommentedSeq, got {type(schema_doc)}: {schema_doc}.")

# Create the loader that will be used to load the target document.
document_loader = Loader(schema_ctx, skip_schemas=args.skip_schemas)
schema_version = schema_metadata.get("saladVersion", None)
document_loader = Loader(
schema_ctx, skip_schemas=args.skip_schemas, salad_version=schema_version
)

if args.codegen:
codegen.codegen(
Expand Down
68 changes: 37 additions & 31 deletions schema_salad/metaschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import logging
import os
import pathlib
import re
import tempfile
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
import xml.sax # nosec
Expand Down Expand Up @@ -574,41 +573,47 @@ def load(self, doc, baseuri, loadingOptions, docRoot=None):


class _TypeDSLLoader(_Loader):
typeDSLregex = re.compile(r"^([^[?]+)(\[\])?(\?)?$")

def __init__(self, inner, refScope):
# type: (_Loader, Union[int, None]) -> None
def __init__(self, inner, refScope, salad_version):
# type: (_Loader, Union[int, None], str) -> None
self.inner = inner
self.refScope = refScope
self.salad_version = salad_version

def resolve(
self,
doc, # type: str
baseuri, # type: str
loadingOptions, # type: LoadingOptions
):
# type: (...) -> Union[List[Union[Dict[str, str], str]], Dict[str, str], str]
m = self.typeDSLregex.match(doc)
if m:
group1 = m.group(1)
assert group1 is not None # nosec
first = expand_url(group1, baseuri, loadingOptions, False, True, self.refScope)
second = third = None
if bool(m.group(2)):
second = {"type": "array", "items": first}
# second = CommentedMap((("type", "array"),
# ("items", first)))
# second.lc.add_kv_line_col("type", lc)
# second.lc.add_kv_line_col("items", lc)
# second.lc.filename = filename
if bool(m.group(3)):
third = ["null", second or first]
# third = CommentedSeq(["null", second or first])
# third.lc.add_kv_line_col(0, lc)
# third.lc.add_kv_line_col(1, lc)
# third.lc.filename = filename
return third or second or first
return doc
# type: (...) -> Union[List[Union[Dict[str, Any], str]], Dict[str, Any], str]
doc_ = doc
optional = False
if doc_.endswith("?"):
optional = True
doc_ = doc_[0:-1]

if doc_.endswith("[]"):
salad_versions = [int(v) for v in self.salad_version[1:].split(".")]
items = "" # type: Union[List[Union[Dict[str, Any], str]], Dict[str, Any], str]
rest = doc_[0:-2]
if salad_versions < [1, 3]:
if rest.endswith("[]"):
# To show the error message with the original type
return doc
else:
items = expand_url(rest, baseuri, loadingOptions, False, True, self.refScope)
else:
items = self.resolve(rest, baseuri, loadingOptions)
if isinstance(items, str):
items = expand_url(items, baseuri, loadingOptions, False, True, self.refScope)
expanded = {"type": "array", "items": items} # type: Union[Dict[str, Any], str]
else:
expanded = expand_url(doc_, baseuri, loadingOptions, False, True, self.refScope)

if optional:
return ["null", expanded]
else:
return expanded

def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
Expand Down Expand Up @@ -3576,6 +3581,7 @@ def save(
typedsl_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype_2 = _TypeDSLLoader(
union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype,
2,
"v1.1",
)
array_of_RecordFieldLoader = _ArrayLoader(RecordFieldLoader)
union_of_None_type_or_array_of_RecordFieldLoader = _UnionLoader(
Expand All @@ -3588,7 +3594,7 @@ def save(
union_of_None_type_or_array_of_RecordFieldLoader, "name", "type"
)
Record_nameLoader = _EnumLoader(("record",), "Record_name")
typedsl_Record_nameLoader_2 = _TypeDSLLoader(Record_nameLoader, 2)
typedsl_Record_nameLoader_2 = _TypeDSLLoader(Record_nameLoader, 2, "v1.1")
union_of_None_type_or_strtype = _UnionLoader(
(
None_type,
Expand All @@ -3600,15 +3606,15 @@ def save(
)
uri_array_of_strtype_True_False_None = _URILoader(array_of_strtype, True, False, None)
Enum_nameLoader = _EnumLoader(("enum",), "Enum_name")
typedsl_Enum_nameLoader_2 = _TypeDSLLoader(Enum_nameLoader, 2)
typedsl_Enum_nameLoader_2 = _TypeDSLLoader(Enum_nameLoader, 2, "v1.1")
uri_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype_False_True_2 = _URILoader(
union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_strtype,
False,
True,
2,
)
Array_nameLoader = _EnumLoader(("array",), "Array_name")
typedsl_Array_nameLoader_2 = _TypeDSLLoader(Array_nameLoader, 2)
typedsl_Array_nameLoader_2 = _TypeDSLLoader(Array_nameLoader, 2, "v1.1")
union_of_None_type_or_booltype = _UnionLoader(
(
None_type,
Expand Down Expand Up @@ -3665,7 +3671,7 @@ def save(
union_of_None_type_or_array_of_SpecializeDefLoader, "specializeFrom", "specializeTo"
)
Documentation_nameLoader = _EnumLoader(("documentation",), "Documentation_name")
typedsl_Documentation_nameLoader_2 = _TypeDSLLoader(Documentation_nameLoader, 2)
typedsl_Documentation_nameLoader_2 = _TypeDSLLoader(Documentation_nameLoader, 2, "v1.1")
union_of_SaladRecordSchemaLoader_or_SaladEnumSchemaLoader_or_DocumentationLoader = (
_UnionLoader(
(
Expand Down
7 changes: 6 additions & 1 deletion schema_salad/metaschema/typedsl_res.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,14 @@

* If the type ends with a question mark `?`, the question mark is stripped off and the type is expanded to a union with `null`
* If the type ends with square brackets `[]` it is expanded to an array with items of the preceding type symbol
* The type may end with both `[]?` to indicate it is an optional array.
* The type may end with both square brackets with one question mark (`[]?`) to indicate it is an optional array.
* Identifier resolution is applied after type DSL expansion.

Starting with Schema Salad version 1.3, fields tagged with `typeDSL: true` in `jsonldPredicate` have the following additional behavior:

* Square brackes `[]` can be repeated to indicate 2, 3, or more dimensional array types.
* These multi-dimensional arrays, like 1-dimensional arrays, can be combined with `?` (for example, `[][]?`) to indicate that it is an optional multi-dimensional array.

### Type DSL example

Given the following schema:
Expand Down
4 changes: 3 additions & 1 deletion schema_salad/python_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def __init__(
out: IO[str],
copyright: Optional[str],
parser_info: str,
salad_version: str,
) -> None:
super().__init__()
self.out = out
Expand All @@ -90,6 +91,7 @@ def __init__(
self.idfield = ""
self.copyright = copyright
self.parser_info = parser_info
self.salad_version = salad_version

@staticmethod
def safe_name(name: str) -> str:
Expand Down Expand Up @@ -629,7 +631,7 @@ def typedsl_loader(self, inner: TypeDef, ref_scope: Optional[int]) -> TypeDef:
return self.declare_type(
TypeDef(
f"typedsl_{self.safe_name(inner.name)}_{ref_scope}",
f"_TypeDSLLoader({self.safe_name(inner.name)}, {ref_scope})",
f"_TypeDSLLoader({self.safe_name(inner.name)}, {ref_scope}, '{self.salad_version}')",
)
)

Expand Down
59 changes: 32 additions & 27 deletions schema_salad/python_codegen_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import os
import pathlib
import re
import tempfile
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
import xml.sax # nosec
Expand Down Expand Up @@ -571,41 +570,47 @@ def load(self, doc, baseuri, loadingOptions, docRoot=None):


class _TypeDSLLoader(_Loader):
typeDSLregex = re.compile(r"^([^[?]+)(\[\])?(\?)?$")

def __init__(self, inner, refScope):
# type: (_Loader, Union[int, None]) -> None
def __init__(self, inner, refScope, salad_version):
# type: (_Loader, Union[int, None], str) -> None
self.inner = inner
self.refScope = refScope
self.salad_version = salad_version

def resolve(
self,
doc, # type: str
baseuri, # type: str
loadingOptions, # type: LoadingOptions
):
# type: (...) -> Union[List[Union[Dict[str, str], str]], Dict[str, str], str]
m = self.typeDSLregex.match(doc)
if m:
group1 = m.group(1)
assert group1 is not None # nosec
first = expand_url(group1, baseuri, loadingOptions, False, True, self.refScope)
second = third = None
if bool(m.group(2)):
second = {"type": "array", "items": first}
# second = CommentedMap((("type", "array"),
# ("items", first)))
# second.lc.add_kv_line_col("type", lc)
# second.lc.add_kv_line_col("items", lc)
# second.lc.filename = filename
if bool(m.group(3)):
third = ["null", second or first]
# third = CommentedSeq(["null", second or first])
# third.lc.add_kv_line_col(0, lc)
# third.lc.add_kv_line_col(1, lc)
# third.lc.filename = filename
return third or second or first
return doc
# type: (...) -> Union[List[Union[Dict[str, Any], str]], Dict[str, Any], str]
doc_ = doc
optional = False
if doc_.endswith("?"):
optional = True
doc_ = doc_[0:-1]

if doc_.endswith("[]"):
salad_versions = [int(v) for v in self.salad_version[1:].split(".")]
items = "" # type: Union[List[Union[Dict[str, Any], str]], Dict[str, Any], str]
rest = doc_[0:-2]
if salad_versions < [1, 3]:
if rest.endswith("[]"):
# To show the error message with the original type
return doc
else:
items = expand_url(rest, baseuri, loadingOptions, False, True, self.refScope)
else:
items = self.resolve(rest, baseuri, loadingOptions)
if isinstance(items, str):
items = expand_url(items, baseuri, loadingOptions, False, True, self.refScope)
expanded = {"type": "array", "items": items} # type: Union[Dict[str, Any], str]
else:
expanded = expand_url(doc_, baseuri, loadingOptions, False, True, self.refScope)

if optional:
return ["null", expanded]
else:
return expanded

def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
Expand Down
60 changes: 42 additions & 18 deletions schema_salad/ref_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
)

_logger = logging.getLogger("salad")
typeDSLregex = re.compile(r"^([^[?]+)(\[\])?(\?)?$")


def file_uri(path: str, split_frag: bool = False) -> str:
Expand Down Expand Up @@ -141,6 +140,7 @@ def SubLoader(loader: "Loader") -> "Loader":
url_fields=loader.url_fields,
allow_attachments=loader.allow_attachments,
session=loader.session,
salad_version=loader.salad_version,
)


Expand All @@ -158,6 +158,7 @@ def __init__(
url_fields: Optional[Set[str]] = None,
allow_attachments: Optional[AttachmentsType] = None,
doc_cache: Union[str, bool] = True,
salad_version: Optional[str] = None,
) -> None:
self.idx: IdxType = (
NormDict(lambda url: urllib.parse.urlsplit(url).geturl()) if idx is None else idx
Expand Down Expand Up @@ -207,6 +208,11 @@ def __init__(
self.secondaryFile_dsl_fields: Set[str] = set()
self.allow_attachments = allow_attachments

if salad_version:
self.salad_version = salad_version
else:
self.salad_version = "v1.1"

self.add_context(ctx)

def expand_url(
Expand Down Expand Up @@ -631,23 +637,41 @@ def _type_dsl(
if not isinstance(t, str):
return t

m = typeDSLregex.match(t)
if not m:
return t
first = m.group(1)
assert first # nosec
second = third = None
if bool(m.group(2)):
second = CommentedMap((("type", "array"), ("items", first)))
second.lc.add_kv_line_col("type", lc)
second.lc.add_kv_line_col("items", lc)
second.lc.filename = filename
if bool(m.group(3)):
third = CommentedSeq(["null", second or first])
third.lc.add_kv_line_col(0, lc)
third.lc.add_kv_line_col(1, lc)
third.lc.filename = filename
return third or second or first
t_ = t
optional = False
if t_.endswith("?"):
optional = True
t_ = t_[0:-1]

if t_.endswith("[]"):
salad_versions = [int(v) for v in self.salad_version[1:].split(".")]
rest = t_[0:-2]
if salad_versions < [1, 3]:
if rest.endswith("[]"):
# To show the error message with the original type
return t
else:
cmap = CommentedMap((("type", "array"), ("items", rest)))
else:
items = self._type_dsl(rest, lc, filename)
cmap = CommentedMap((("type", "array"), ("items", items)))
cmap.lc.add_kv_line_col("type", lc)
cmap.lc.add_kv_line_col("items", lc)
cmap.lc.filename = filename
expanded: Union[str, CommentedMap, CommentedSeq] = cmap
else:
expanded = t_

if optional:
cs = CommentedSeq(["null", expanded])
cs.lc.add_kv_line_col(0, lc)
cs.lc.add_kv_line_col(1, lc)
cs.lc.filename = filename
ret: Union[str, CommentedMap, CommentedSeq] = cs
else:
ret = expanded

return ret

def _secondaryFile_dsl(
self,
Expand Down
3 changes: 2 additions & 1 deletion schema_salad/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ def get_metaschema() -> Tuple[Names, List[Dict[str, str]], Loader]:
},
"typeDSL": saladp + "JsonldPredicate/typeDSL",
"xsd": "http://www.w3.org/2001/XMLSchema#",
}
},
salad_version="v1.3",
mr-c marked this conversation as resolved.
Show resolved Hide resolved
)

for salad in SALAD_FILES:
Expand Down
2 changes: 1 addition & 1 deletion schema_salad/tests/metaschema-pre.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"```\n\nThis becomes:\n\n```\n",
"{\n \"mapped\": [\n {\n \"value\": \"daphne\",\n \"key\": \"fred\"\n },\n {\n \"value\": \"scooby\",\n \"key\": \"shaggy\"\n }\n ]\n}\n",
"```\n",
"## Domain Specific Language for types\n\nFields may be tagged `typeDSL: true` in `jsonldPredicate`. If so, the field is expanded using the\nfollowing micro-DSL for schema salad types:\n\n* If the type ends with a question mark `?`, the question mark is stripped off and the type is expanded to a union with `null`\n* If the type ends with square brackets `[]` it is expanded to an array with items of the preceding type symbol\n* The type may end with both `[]?` to indicate it is an optional array.\n* Identifier resolution is applied after type DSL expansion.\n\n### Type DSL example\n\nGiven the following schema:\n\n```\n",
"## Domain Specific Language for types\n\nFields may be tagged `typeDSL: true` in `jsonldPredicate`. If so, the field is expanded using the\nfollowing micro-DSL for schema salad types:\n\n* If the type ends with a question mark `?`, the question mark is stripped off and the type is expanded to a union with `null`\n* If the type ends with square brackets `[]` it is expanded to an array with items of the preceding type symbol\n* The type may end with both square brackets with one question mark (`[]?`) to indicate it is an optional array.\n* Identifier resolution is applied after type DSL expansion.\n\nStarting with Schema Salad version 1.3, fields tagged with `typeDSL: true` in `jsonldPredicate` have the following additional behavior:\n\n* Square brackes `[]` can be repeated to indicate 2, 3, or more dimensional array types.\n* These multi-dimensional arrays, like 1-dimensional arrays, can be combined with `?` (for example, `[][]?`) to indicate that it is an optional multi-dimensional array.\n\n### Type DSL example\n\nGiven the following schema:\n\n```\n",
"{\n \"$graph\": [\n {\"$import\": \"metaschema_base.yml\"},\n {\n \"name\": \"TypeDSLExample\",\n \"type\": \"record\",\n \"documentRoot\": true,\n \"fields\": [{\n \"name\": \"extype\",\n \"type\": \"string\",\n \"jsonldPredicate\": {\n _type: \"@vocab\",\n \"typeDSL\": true\n }\n }]\n }]\n}\n",
"```\n\nProcess the following example:\n\n```\n",
"[{\n \"extype\": \"string\"\n}, {\n \"extype\": \"string?\"\n}, {\n \"extype\": \"string[]\"\n}, {\n \"extype\": \"string[]?\"\n}]\n",
Expand Down
Loading