Skip to content

Commit

Permalink
Self schema (pydantic#131)
Browse files Browse the repository at this point in the history
* generating self schema, fix pydantic#127

* allow generating self schema with older python

* schema generating

* remove pydantic dependency from generating schema

* removing unused schema recursion checks

* use build.rs, run not eval

* fix to makefile

* fixing schema generation

* custom discriminator and fixing tests

* forbid extra

* fix build

* fix build for 3.8

* trying to fix ci...

* fix benchmarks

* coverage

* coverage
  • Loading branch information
samuelcolvin authored Jul 18, 2022
1 parent 515ff10 commit 5c33ba0
Show file tree
Hide file tree
Showing 34 changed files with 449 additions and 254 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ jobs:
with:
python-version: '3.10'

- run: pip install 'black>=22.3.0,<23' typing_extensions
- run: make rust-benchmark

build-wasm-emscripten:
Expand Down Expand Up @@ -208,7 +209,7 @@ jobs:
run: cargo update -p pydantic-core
if: "startsWith(github.ref, 'refs/tags/')"

- run: pip install 'maturin>=0.13,<0.14'
- run: pip install 'maturin>=0.13,<0.14' 'black>=22.3.0,<23' typing_extensions

- name: build wheels
run: make build-wasm
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ docs/_build/
node_modules/
package-lock.json
/pytest-speed/
/src/self_schema.py
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.DEFAULT_GOAL := all
isort = isort pydantic_core tests
black = black pydantic_core tests
isort = isort pydantic_core tests generate_self_schema.py
black = black pydantic_core tests generate_self_schema.py

.PHONY: install
install:
Expand Down Expand Up @@ -132,6 +132,7 @@ clean:
rm -f `find . -type f -name '*.py[co]' `
rm -f `find . -type f -name '*~' `
rm -f `find . -type f -name '.*~' `
rm -rf src/self_schema.py
rm -rf .cache
rm -rf flame
rm -rf htmlcov
Expand Down
1 change: 1 addition & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* remove int from bool parsing - covered by float check below
8 changes: 4 additions & 4 deletions benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ fn as_str(i: u8) -> String {
fn dict_json(bench: &mut Bencher) {
let gil = Python::acquire_gil();
let py = gil.python();
let validator = build_schema_validator(py, "{'type': 'dict', 'keys': 'str', 'values': 'int'}");
let validator = build_schema_validator(py, "{'type': 'dict', 'keys_schema': 'str', 'values_schema': 'int'}");

let code = format!(
"{{{}}}",
Expand All @@ -245,7 +245,7 @@ fn dict_json(bench: &mut Bencher) {
fn dict_python(bench: &mut Bencher) {
let gil = Python::acquire_gil();
let py = gil.python();
let validator = build_schema_validator(py, "{'type': 'dict', 'keys': 'str', 'values': 'int'}");
let validator = build_schema_validator(py, "{'type': 'dict', 'keys_schema': 'str', 'values_schema': 'int'}");

let code = format!(
"{{{}}}",
Expand Down Expand Up @@ -318,7 +318,7 @@ fn typed_dict_json(bench: &mut Bencher) {
py,
r#"{
'type': 'typed-dict',
'extra': 'ignore',
'extra_behavior': 'ignore',
'fields': {
'a': {'schema': 'int'},
'b': {'schema': 'int'},
Expand Down Expand Up @@ -347,7 +347,7 @@ fn typed_dict_python(bench: &mut Bencher) {
py,
r#"{
'type': 'typed-dict',
'extra': 'ignore',
'extra_behavior': 'ignore',
'fields': {
'a': {'schema': 'int'},
'b': {'schema': 'int'},
Expand Down
18 changes: 18 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
use std::process::Command;
use std::str::from_utf8;

fn generate_self_schema() {
let output = Command::new("python")
.arg("generate_self_schema.py")
.output()
.expect("failed to execute process");

if !output.status.success() {
let stdout = from_utf8(&output.stdout).unwrap();
let stderr = from_utf8(&output.stderr).unwrap();
eprint!("{}{}", stdout, stderr);
panic!("generate_self_schema.py failed with {}", output.status);
}
}

fn main() {
pyo3_build_config::use_pyo3_cfgs();
if let Some(true) = version_check::supports_feature("no_coverage") {
println!("cargo:rustc-cfg=has_no_coverage");
}
generate_self_schema()
}
176 changes: 176 additions & 0 deletions generate_self_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
This script generates the schema for the schema - e.g.
a definition of what inputs can be provided to `SchemaValidator()`.
The schema is generated from `pydantic_core/_types.py`.
"""
import importlib.util
import re
from collections.abc import Callable
from datetime import date, datetime, time, timedelta
from pathlib import Path
from typing import Any, Dict, ForwardRef, List, Type, Union

from black import Mode, TargetVersion, format_file_contents
from typing_extensions import get_args, is_typeddict

try:
from typing import get_origin
except ImportError:

def get_origin(t):
return getattr(t, '__origin__', None)


THIS_DIR = Path(__file__).parent
SAVE_PATH = THIS_DIR / 'src' / 'self_schema.py'

# can't import _types.py directly as pydantic-core might not be installed
core_types_spec = importlib.util.spec_from_file_location('_typing', str(THIS_DIR / 'pydantic_core' / '_types.py'))
core_types = importlib.util.module_from_spec(core_types_spec)
core_types_spec.loader.exec_module(core_types)

# the validator for referencing schema (Schema is used recursively, so has to use a reference)
schema_ref_validator = {'type': 'recursive-ref', 'schema_ref': 'root-schema'}


def get_schema(obj):
if isinstance(obj, str):
return obj
elif obj in (datetime, timedelta, date, time, bool, int, float, str):
return obj.__name__
elif is_typeddict(obj):
return type_dict_schema(obj)
elif obj == Any:
return 'any'
elif obj == type:
# todo
return 'any'

origin = get_origin(obj)
assert origin is not None, f'origin cannot be None, obj={obj}'
if origin is Union:
return union_schema(obj)
elif obj is Callable or origin is Callable:
return 'callable'
elif origin is core_types.Literal:
expected = all_literal_values(obj)
assert expected, f'literal "expected" cannot be empty, obj={obj}'
return {'type': 'literal', 'expected': expected}
elif issubclass(origin, List):
return {'type': 'list', 'items_schema': get_schema(obj.__args__[0])}
elif issubclass(origin, Dict):
return {
'type': 'dict',
'keys_schema': get_schema(obj.__args__[0]),
'values_schema': get_schema(obj.__args__[1]),
}
elif issubclass(origin, Type):
# can't really use 'is-instance' since this is used for the class_ parameter of
# 'is-instance' validators
return 'any'
else:
# debug(obj)
raise TypeError(f'Unknown type: {obj!r}')


def type_dict_schema(typed_dict):
required_keys = getattr(typed_dict, '__required_keys__', set())
fields = {}

for field_name, field_type in typed_dict.__annotations__.items():
required = field_name in required_keys
schema = None
if type(field_type) == ForwardRef:
fr_arg = field_type.__forward_arg__
fr_arg, matched = re.subn(r'NotRequired\[(.+)]', r'\1', fr_arg)
if matched:
required = False

fr_arg, matched = re.subn(r'Required\[(.+)]', r'\1', fr_arg)
if matched:
required = True

if 'Schema' == fr_arg or re.search('[^a-zA-Z]Schema', fr_arg):
if fr_arg == 'Schema':
schema = schema_ref_validator
elif fr_arg == 'List[Schema]':
schema = {'type': 'list', 'items_schema': schema_ref_validator}
elif fr_arg == 'Dict[str, Schema]':
schema = {'type': 'dict', 'keys_schema': 'str', 'values_schema': schema_ref_validator}
else:
raise ValueError(f'Unknown Schema forward ref: {fr_arg}')
else:
field_type = eval_forward_ref(field_type)

if schema is None:
if get_origin(field_type) == core_types.Required:
required = True
field_type = field_type.__args__[0]
if get_origin(field_type) == core_types.NotRequired:
required = False
field_type = field_type.__args__[0]

schema = get_schema(field_type)

fields[field_name] = {'schema': schema, 'required': required}

return {'type': 'typed-dict', 'description': typed_dict.__name__, 'fields': fields, 'extra_behavior': 'forbid'}


def union_schema(union_type):
return {'type': 'union', 'choices': [get_schema(arg) for arg in union_type.__args__]}


def all_literal_values(type_):
if get_origin(type_) is core_types.Literal:
values = get_args(type_)
return [x for value in values for x in all_literal_values(value)]
else:
return [type_]


def eval_forward_ref(type_):
try:
return type_._evaluate(core_types.__dict__, None, set())
except TypeError:
# for older python (3.7 at least)
return type_._evaluate(core_types.__dict__, None)


def main():
schema_union = core_types.Schema
assert get_origin(schema_union) is Union, 'expected pydantic_core._types.Schema to be a union'

schema = {
'type': 'tagged-union',
'ref': 'root-schema',
'discriminator': 'self-schema-discriminator',
'choices': {'plain-string': get_schema(schema_union.__args__[0])},
}
for s in schema_union.__args__[1:]:
type_ = s.__annotations__['type']
m = re.search(r"Literal\['(.+?)']", type_.__forward_arg__)
assert m, f'Unknown schema type: {type_}'
key = m.group(1)
value = get_schema(s)
if key == 'function' and value['fields']['mode']['schema']['expected'] == ['plain']:
key = 'function-plain'
schema['choices'][key] = value

python_code = (
f'# this file is auto-generated by generate_self_schema.py, DO NOT edit manually\nself_schema = {schema}\n'
)
mode = Mode(
line_length=120,
string_normalization=False,
magic_trailing_comma=False,
target_versions={TargetVersion.PY37, TargetVersion.PY38, TargetVersion.PY39, TargetVersion.PY310},
)
python_code = format_file_contents(python_code, fast=False, mode=mode)
SAVE_PATH.write_text(python_code)
print(f'Self schema definition written to {SAVE_PATH}')


if __name__ == '__main__':
main()
31 changes: 16 additions & 15 deletions pydantic_core/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
else:
from typing import NotRequired, Required

if sys.version_info < (3, 8):
if sys.version_info < (3, 9):
from typing_extensions import Literal, TypedDict
else:
from typing import Literal, TypedDict
Expand Down Expand Up @@ -70,7 +70,7 @@ class FunctionSchema(TypedDict):
type: Literal['function']
mode: Literal['before', 'after', 'wrap']
function: Callable[..., Any]
schema: Schema
schema: NotRequired[Schema]
ref: NotRequired[str]


Expand Down Expand Up @@ -111,6 +111,7 @@ class ModelClassSchema(TypedDict):
type: Literal['model-class']
class_type: type
schema: TypedDictSchema
strict: NotRequired[bool]
ref: NotRequired[str]
config: NotRequired[Config]

Expand Down Expand Up @@ -273,30 +274,30 @@ class CallableSchema(TypedDict):


# pydantic allows types to be defined via a simple string instead of dict with just `type`, e.g.
# 'int' is equivalent to {'type': 'int'}
# 'int' is equivalent to {'type': 'int'}, this only applies to schema types which do not have other required fields
BareType = Literal[
'any',
'bool',
'none',
'str',
'bytes',
'dict',
'float',
'function',
'int',
'bool',
'float',
'dict',
'list',
'model',
'model-class',
'none',
'nullable',
'recursive-container',
'recursive-reference',
'set',
'str',
# tuple-fix-len cannot be created without more typing information
'frozenset',
'tuple-var-len',
'union',
'date',
'time',
'datetime',
'timedelta',
'callable',
]

# generate_self_schema.py is hard coded to convert this Union[BareType, Union[...rest]] where the second union is tagged
# so `BareType` MUST come first
Schema = Union[
BareType,
AnySchema,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["maturin>=0.13,<0.14"]
requires = ["maturin>=0.13,<0.14", "black>=22.3.0,<23", "typing_extensions"]
build-backend = "maturin"

[project]
Expand Down
5 changes: 2 additions & 3 deletions src/build_tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,11 @@ impl SchemaError {
PyErr::new::<SchemaError, A>(args)
}

pub fn from_val_error(py: Python, prefix: &str, error: ValError) -> PyErr {
pub fn from_val_error(py: Python, error: ValError) -> PyErr {
match error {
ValError::LineErrors(line_errors) => {
let join = if line_errors.len() == 1 { ":" } else { ":\n" };
let details = pretty_line_errors(py, line_errors);
SchemaError::new_err(format!("{}{}{}", prefix, join, details))
SchemaError::new_err(format!("Invalid Schema:\n{}", details))
}
ValError::InternalErr(py_err) => py_err,
}
Expand Down
5 changes: 2 additions & 3 deletions src/lookup_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,13 @@ impl PathItem {
if let Ok(str_key) = obj.extract::<String>() {
let py_str_key = py_string!(py, &str_key);
Ok(Self::S(str_key, py_str_key))
} else if let Ok(int_key) = obj.extract::<usize>() {
} else {
let int_key = obj.extract::<usize>()?;
if index == 0 {
py_error!(PyTypeError; "The first item in an alias path must be a string")
} else {
Ok(Self::I(int_key))
}
} else {
py_error!(PyTypeError; "Alias path items must be with a string or int")
}
}

Expand Down
Loading

0 comments on commit 5c33ba0

Please sign in to comment.