From 43ab5c47f9ac05deb51747715c30ab49af8180f1 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 13 Aug 2024 09:06:01 -0700 Subject: [PATCH 1/7] enable oneOf support --- guidance/library/_json.py | 9 +++++++++ tests/unit/library/test_json.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 7d3f679f3..ca2a1bbed 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -11,6 +11,7 @@ Type, TYPE_CHECKING, ) +import warnings try: import jsonschema @@ -38,6 +39,7 @@ def _to_compact_json(target: Any) -> str: class Keyword(str, Enum): ANYOF = "anyOf" ALLOF = "allOf" + ONEOF = "oneOf" REF = "$ref" CONST = "const" ENUM = "enum" @@ -337,6 +339,13 @@ def _gen_json( raise ValueError("Only support allOf with exactly one item") return lm + _gen_json(allof_list[0], definitions) + if Keyword.ONEOF in json_schema: + oneof_list = json_schema[Keyword.ONEOF] + if len(oneof_list) == 1: + return lm + _gen_json(oneof_list[0], definitions) + warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") + return lm + _process_anyOf(anyof_list=oneof_list, definitions=definitions) + if Keyword.REF in json_schema: return lm + _get_definition(reference=json_schema[Keyword.REF], definitions=definitions) diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index 4669a1071..e36b98ef2 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -1253,6 +1253,37 @@ def test_allOf_bad_schema(self): lm += gen_json(name=CAPTURE_KEY, schema=schema_obj) assert ve.value.args[0] == "Only support allOf with exactly one item" +class TestOneOf: + @pytest.mark.parametrize("target_obj", [123, 42]) + def test_oneOf_simple(self, target_obj): + schema = """{ + "oneOf" : [{ "type": "integer" }] + } + """ + # First sanity check what we're setting up + schema_obj = json.loads(schema) + validate(instance=target_obj, schema=schema_obj) + + # The actual check + generate_and_check(target_obj, schema_obj) + + + @pytest.mark.parametrize("target_obj", [123, True]) + def test_oneOf_compound(self, target_obj): + schema = """{ + "oneOf" : [{ "type": "integer" }, { "type": "boolean" }] + } + """ + # First sanity check what we're setting up + schema_obj = json.loads(schema) + validate(instance=target_obj, schema=schema_obj) + + # The actual check; we expect a warning here because oneOf is not fully supported + with pytest.warns() as record: + generate_and_check(target_obj, schema_obj) + assert len(record) == 1 + assert record[0].message.args[0].startswith("oneOf not fully supported") + class TestEnum: simple_schema = """{ From fd19c4317224d49d8d1657ad39ba99c0f7ad4c92 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 14 Aug 2024 09:14:28 -0700 Subject: [PATCH 2/7] add new-style 'id' field to ignored keys --- guidance/library/_json.py | 1 + 1 file changed, 1 insertion(+) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index ca2a1bbed..e20cfdaad 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -56,6 +56,7 @@ class Keyword(str, Enum): IGNORED_KEYS = { "$schema", "$id", + "id", "$comment", "title", "description", From b318983287721253e606caefc3739577292cdfc6 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 14 Aug 2024 09:17:06 -0700 Subject: [PATCH 3/7] add discriminator to ignored_keys --- guidance/library/_json.py | 1 + 1 file changed, 1 insertion(+) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index e20cfdaad..84d1afe41 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -63,6 +63,7 @@ class Keyword(str, Enum): "default", "examples", "required", # TODO: implement and remove from ignored list + "discriminator", # TODO: alternatively we could implement this in a stateful way } TYPE_SPECIFIC_KEYS = { From ed264f64c0bc28318e376e07471c151c7a78aec5 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 14 Aug 2024 11:41:52 -0700 Subject: [PATCH 4/7] Add test for discriminated union --- tests/unit/library/test_pydantic.py | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/unit/library/test_pydantic.py b/tests/unit/library/test_pydantic.py index e15f623d2..4b78ffa9d 100644 --- a/tests/unit/library/test_pydantic.py +++ b/tests/unit/library/test_pydantic.py @@ -271,3 +271,44 @@ def test_bad_generic( allowed_bytes=allowed_bytes, pydantic_model=model, ) + +class TestDiscriminatedUnion: + """ + https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions-with-str-discriminators + """ + + class Cat(pydantic.BaseModel): + pet_type: Literal['cat'] + meows: int + + + class Dog(pydantic.BaseModel): + pet_type: Literal['dog'] + barks: float + + + class Lizard(pydantic.BaseModel): + pet_type: Literal['reptile', 'lizard'] + scales: bool + + + class Model(pydantic.BaseModel): + pet: Union[ + 'TestDiscriminatedUnion.Cat', + 'TestDiscriminatedUnion.Dog', + 'TestDiscriminatedUnion.Lizard', + ] = pydantic.Field(..., discriminator='pet_type') + n: int + + def test_good(self): + obj = {"pet": {"pet_type": "dog", "barks": 3.14}, "n": 42} + generate_and_check(obj, self.Model) + + def test_bad(self): + check_match_failure( + bad_obj={"pet": {"pet_type": "dog"}, "n": 42}, + good_bytes=b'{"pet":{"pet_type":"dog"', + failure_byte=b"}", + allowed_bytes={Byte(b",")}, # expect a comma to continue the object with "barks" + pydantic_model=self.Model, + ) From fe6ebd68748bcdf41ed07fe589b2baa0b8382507 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 20 Aug 2024 11:18:33 +0100 Subject: [PATCH 5/7] fix TestDiscriminatedUnion check_match_failure --- tests/unit/library/test_pydantic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/library/test_pydantic.py b/tests/unit/library/test_pydantic.py index 384840d91..e801e7f35 100644 --- a/tests/unit/library/test_pydantic.py +++ b/tests/unit/library/test_pydantic.py @@ -308,10 +308,12 @@ def test_good(self): generate_and_check(obj, self.Model) def test_bad(self): - check_match_failure( + check_match_failure( bad_obj={"pet": {"pet_type": "dog"}, "n": 42}, good_bytes=b'{"pet":{"pet_type":"dog"', failure_byte=b"}", - allowed_bytes={Byte(b",")}, # expect a comma to continue the object with "barks" + allowed_bytes={b","}, # expect a comma to continue the object with "barks" pydantic_model=self.Model, + maybe_whitespace=False, + compact=True ) From ed602184d5bdec222bbc6e8bc2c5318d36be4a76 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 22 Aug 2024 14:56:29 +0100 Subject: [PATCH 6/7] add test to ensure that IGNORED_KEYS doesn't accidentally blacklist property names --- tests/unit/library/test_json.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index 53302736d..c56144ada 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -8,7 +8,7 @@ from guidance import json as gen_json from guidance import models -from guidance.library._json import _to_compact_json, WHITESPACE +from guidance.library._json import _to_compact_json, WHITESPACE, IGNORED_KEYS from ...utils import check_match_failure as _check_match_failure from ...utils import check_run_with_temperature @@ -1981,3 +1981,13 @@ def test_no_additionalProperties(self, compact): maybe_whitespace=True, compact=compact, ) + +def test_ignored_keys_allowed_as_properties(): + schema_obj = { + "type": "object", + "properties": { + key: {"type": "string"} for key in IGNORED_KEYS + } + } + target_obj = {key: "value" for key in IGNORED_KEYS} + generate_and_check(target_obj, schema_obj) From 026a8857a0da14bc8e85af9160139129ea565c11 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 22 Aug 2024 15:04:24 +0100 Subject: [PATCH 7/7] comment on discriminator --- guidance/library/_json.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 01a973ebf..d5f156233 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -64,9 +64,16 @@ class Keyword(str, Enum): "default", "examples", "required", # TODO: implement and remove from ignored list - "discriminator", # TODO: alternatively we could implement this in a stateful way } +# discriminator is part of OpenAPI 3.1, not JSON Schema itself +# https://json-schema.org/blog/posts/validating-openapi-and-json-schema +# TODO: While ignoring this key shouldn't lead to invalid outputs, forcing +# the model to choose the value of the marked field before other fields +# are generated (statefully or statelessly) would reduce grammar ambiguity +# and possibly improve quality. +IGNORED_KEYS.add("discriminator") + TYPE_SPECIFIC_KEYS = { "array": {"items", "prefixItems", "minItems", "maxItems"}, "object": {"properties", "additionalProperties"},