From 98d9fb866c4d3b59ca88493472aca291170a0f5e Mon Sep 17 00:00:00 2001 From: Dan Redding <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 31 Aug 2024 14:28:09 +0100 Subject: [PATCH] refactor: Reduce `SchemaValidationError` traceback length (#3530) --- altair/utils/schemapi.py | 213 +++++++++++++++++++++---------------- altair/vegalite/v5/api.py | 165 ++++++++++++++-------------- tools/schemapi/schemapi.py | 213 +++++++++++++++++++++---------------- 3 files changed, 324 insertions(+), 267 deletions(-) diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index bc0b40581..84f5be277 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -42,6 +42,7 @@ from altair import vegalite if TYPE_CHECKING: + from types import ModuleType from typing import ClassVar from referencing import Registry @@ -57,6 +58,7 @@ from typing import Never, Self else: from typing_extensions import Never, Self + _OptionalModule: TypeAlias = "ModuleType | None" ValidationErrorList: TypeAlias = List[jsonschema.exceptions.ValidationError] GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList] @@ -559,9 +561,25 @@ def _resolve_references( class SchemaValidationError(jsonschema.ValidationError): - """A wrapper for jsonschema.ValidationError with friendlier traceback.""" - def __init__(self, obj: SchemaBase, err: jsonschema.ValidationError) -> None: + """ + A wrapper for ``jsonschema.ValidationError`` with friendlier traceback. + + Parameters + ---------- + obj + The instance that failed ``self.validate(...)``. + err + The original ``ValidationError``. + + Notes + ----- + We do not raise `from err` as else the resulting traceback is very long + as it contains part of the Vega-Lite schema. + + It would also first show the less helpful `ValidationError` instead of + the more user friendly `SchemaValidationError`. + """ super().__init__(**err._contents()) self.obj = obj self._errors: GroupedValidationErrors = getattr( @@ -991,88 +1009,45 @@ def to_dict( Parameters ---------- validate : bool, optional - If True (default), then validate the output dictionary - against the schema. + If True (default), then validate the result against the schema. ignore : list[str], optional - A list of keys to ignore. It is usually not needed - to specify this argument as a user. + A list of keys to ignore. context : dict[str, Any], optional - A context dictionary. It is usually not needed - to specify this argument as a user. - - Notes - ----- - Technical: The ignore parameter will *not* be passed to child to_dict - function calls. - - Returns - ------- - dict - The dictionary representation of this object + A context dictionary. Raises ------ SchemaValidationError : - if validate=True and the dict does not conform to the schema + If ``validate`` and the result does not conform to the schema. + + Notes + ----- + - ``ignore``, ``context`` are usually not needed to be specified as a user. + - *Technical*: ``ignore`` will **not** be passed to child :meth:`.to_dict()`. """ - if context is None: - context = {} - if ignore is None: - ignore = [] - # The following return the package only if it has already been - # imported - otherwise they return None. This is useful for - # isinstance checks - for example, if pandas has not been imported, - # then an object is definitely not a `pandas.Timestamp`. - pd_opt = sys.modules.get("pandas") - np_opt = sys.modules.get("numpy") + context = context or {} + ignore = ignore or [] + opts = _get_optional_modules(np_opt="numpy", pd_opt="pandas") if self._args and not self._kwds: - result = _todict( - self._args[0], context=context, np_opt=np_opt, pd_opt=pd_opt - ) + kwds = self._args[0] elif not self._args: kwds = self._kwds.copy() - # parsed_shorthand is added by FieldChannelMixin. - # It's used below to replace shorthand with its long form equivalent - # parsed_shorthand is removed from context if it exists so that it is - # not passed to child to_dict function calls - parsed_shorthand = context.pop("parsed_shorthand", {}) - # Prevent that pandas categorical data is automatically sorted - # when a non-ordinal data type is specifed manually - # or if the encoding channel does not support sorting - if "sort" in parsed_shorthand and ( - "sort" not in kwds or kwds["type"] not in {"ordinal", Undefined} - ): - parsed_shorthand.pop("sort") - - kwds.update( - { - k: v - for k, v in parsed_shorthand.items() - if kwds.get(k, Undefined) is Undefined - } - ) - kwds = { - k: v for k, v in kwds.items() if k not in {*list(ignore), "shorthand"} - } - if "mark" in kwds and isinstance(kwds["mark"], str): - kwds["mark"] = {"type": kwds["mark"]} - result = _todict(kwds, context=context, np_opt=np_opt, pd_opt=pd_opt) + exclude = {*ignore, "shorthand"} + if parsed := context.pop("parsed_shorthand", None): + kwds = _replace_parsed_shorthand(parsed, kwds) + kwds = {k: v for k, v in kwds.items() if k not in exclude} + if (mark := kwds.get("mark")) and isinstance(mark, str): + kwds["mark"] = {"type": mark} else: - msg = ( - f"{self.__class__} instance has both a value and properties : " - "cannot serialize to dict" - ) + msg = f"{type(self)} instance has both a value and properties : cannot serialize to dict" raise ValueError(msg) + result = _todict(kwds, context=context, **opts) if validate: + # NOTE: Don't raise `from err`, see `SchemaValidationError` doc try: self.validate(result) except jsonschema.ValidationError as err: - # We do not raise `from err` as else the resulting - # traceback is very long as it contains part - # of the Vega-Lite schema. It would also first - # show the less helpful ValidationError instead of - # the more user friendly SchemaValidationError raise SchemaValidationError(self, err) from None return result @@ -1092,30 +1067,27 @@ def to_json( Parameters ---------- validate : bool, optional - If True (default), then validate the output dictionary - against the schema. + If True (default), then validate the result against the schema. indent : int, optional The number of spaces of indentation to use. The default is 2. sort_keys : bool, optional If True (default), sort keys in the output. ignore : list[str], optional - A list of keys to ignore. It is usually not needed - to specify this argument as a user. + A list of keys to ignore. context : dict[str, Any], optional - A context dictionary. It is usually not needed - to specify this argument as a user. + A context dictionary. **kwargs Additional keyword arguments are passed to ``json.dumps()`` + Raises + ------ + SchemaValidationError : + If ``validate`` and the result does not conform to the schema. + Notes ----- - Technical: The ignore parameter will *not* be passed to child to_dict - function calls. - - Returns - ------- - str - The JSON specification of the chart object. + - ``ignore``, ``context`` are usually not needed to be specified as a user. + - *Technical*: ``ignore`` will **not** be passed to child :meth:`.to_dict()`. """ if ignore is None: ignore = [] @@ -1143,15 +1115,10 @@ def from_dict( validate : boolean If True (default), then validate the input against the schema. - Returns - ------- - obj : Schema object - The wrapped schema - Raises ------ jsonschema.ValidationError : - if validate=True and dct does not conform to the schema + If ``validate`` and ``dct`` does not conform to the schema """ if validate: cls.validate(dct) @@ -1214,13 +1181,8 @@ def validate_property( cls, name: str, value: Any, schema: dict[str, Any] | None = None ) -> None: """Validate a property against property schema in the context of the rootschema.""" - # The following return the package only if it has already been - # imported - otherwise they return None. This is useful for - # isinstance checks - for example, if pandas has not been imported, - # then an object is definitely not a `pandas.Timestamp`. - pd_opt = sys.modules.get("pandas") - np_opt = sys.modules.get("numpy") - value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt) + opts = _get_optional_modules(np_opt="numpy", pd_opt="pandas") + value = _todict(value, context={}, **opts) props = cls.resolve_references(schema or cls._schema).get("properties", {}) validate_jsonschema( value, props.get(name, {}), rootschema=cls._rootschema or cls._schema @@ -1230,6 +1192,71 @@ def __dir__(self) -> list[str]: return sorted(chain(super().__dir__(), self._kwds)) +def _get_optional_modules(**modules: str) -> dict[str, _OptionalModule]: + """ + Returns packages only if they have already been imported - otherwise they return `None`. + + This is useful for `isinstance` checks. + + For example, if `pandas` has not been imported, then an object is + definitely not a `pandas.Timestamp`. + + Parameters + ---------- + **modules + Keyword-only binding from `{alias: module_name}`. + + Examples + -------- + >>> import pandas as pd # doctest: +SKIP + >>> import polars as pl # doctest: +SKIP + >>> from altair.utils.schemapi import _get_optional_modules # doctest: +SKIP + >>> + >>> _get_optional_modules(pd="pandas", pl="polars", ibis="ibis") # doctest: +SKIP + { + "pd": , + "pl": , + "ibis": None, + } + + If the user later imports ``ibis``, it would appear in subsequent calls. + + >>> import ibis # doctest: +SKIP + >>> + >>> _get_optional_modules(ibis="ibis") # doctest: +SKIP + { + "ibis": , + } + """ + return {k: sys.modules.get(v) for k, v in modules.items()} + + +def _replace_parsed_shorthand( + parsed_shorthand: dict[str, Any], kwds: dict[str, Any] +) -> dict[str, Any]: + """ + `parsed_shorthand` is added by `FieldChannelMixin`. + + It's used below to replace shorthand with its long form equivalent + `parsed_shorthand` is removed from `context` if it exists so that it is + not passed to child `to_dict` function calls. + """ + # Prevent that pandas categorical data is automatically sorted + # when a non-ordinal data type is specifed manually + # or if the encoding channel does not support sorting + if "sort" in parsed_shorthand and ( + "sort" not in kwds or kwds["type"] not in {"ordinal", Undefined} + ): + parsed_shorthand.pop("sort") + + kwds.update( + (k, v) + for k, v in parsed_shorthand.items() + if kwds.get(k, Undefined) is Undefined + ) + return kwds + + TSchemaBase = TypeVar("TSchemaBase", bound=SchemaBase) _CopyImpl = TypeVar("_CopyImpl", SchemaBase, Dict[Any, Any], List[Any]) diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py index 4e8fde039..9ef5659ba 100644 --- a/altair/vegalite/v5/api.py +++ b/altair/vegalite/v5/api.py @@ -27,7 +27,7 @@ from altair import utils from altair.expr import core as _expr_core -from altair.utils import Optional, Undefined +from altair.utils import Optional, SchemaBase, Undefined from altair.utils._vegafusion_data import ( compile_with_vegafusion as _compile_with_vegafusion, ) @@ -125,7 +125,6 @@ ProjectionType, RepeatMapping, RepeatRef, - SchemaBase, SelectionParameter, SequenceGenerator, SortField, @@ -194,7 +193,7 @@ ] ChartDataType: TypeAlias = Optional[Union[DataType, core.Data, str, core.Generator]] -_TSchemaBase = TypeVar("_TSchemaBase", bound=core.SchemaBase) +_TSchemaBase = TypeVar("_TSchemaBase", bound=SchemaBase) # ------------------------------------------------------------------------ @@ -509,7 +508,7 @@ def check_fields_and_encodings(parameter: Parameter, field_name: str) -> bool: ] """Permitted types for `&` reduced predicates.""" -_StatementType: TypeAlias = Union[core.SchemaBase, Map, str] +_StatementType: TypeAlias = Union[SchemaBase, Map, str] """Permitted types for `if_true`/`if_false`. In python terms: @@ -532,7 +531,7 @@ def check_fields_and_encodings(parameter: Parameter, field_name: str) -> bool: _LiteralValue: TypeAlias = Union[str, bool, float, int] """Primitive python value types.""" -_FieldEqualType: TypeAlias = Union[_LiteralValue, Map, Parameter, core.SchemaBase] +_FieldEqualType: TypeAlias = Union[_LiteralValue, Map, Parameter, SchemaBase] """Permitted types for equality checks on field values: - `datum.field == ...` @@ -586,7 +585,7 @@ def _condition_to_selection( **kwargs: Any, ) -> SchemaBase | dict[str, _ConditionType | Any]: selection: SchemaBase | dict[str, _ConditionType | Any] - if isinstance(if_true, core.SchemaBase): + if isinstance(if_true, SchemaBase): if_true = if_true.to_dict() elif isinstance(if_true, str): if isinstance(if_false, str): @@ -600,7 +599,7 @@ def _condition_to_selection( if_true = utils.parse_shorthand(if_true) if_true.update(kwargs) condition.update(if_true) - if isinstance(if_false, core.SchemaBase): + if isinstance(if_false, SchemaBase): # For the selection, the channel definitions all allow selections # already. So use this SchemaBase wrapper if possible. selection = if_false.copy() @@ -662,8 +661,8 @@ def _reveal_parsed_shorthand(obj: Map, /) -> dict[str, Any]: def _is_extra(*objs: Any, kwds: Map) -> Iterator[bool]: for el in objs: - if isinstance(el, (core.SchemaBase, t.Mapping)): - item = el.to_dict(validate=False) if isinstance(el, core.SchemaBase) else el + if isinstance(el, (SchemaBase, t.Mapping)): + item = el.to_dict(validate=False) if isinstance(el, SchemaBase) else el yield not (item.keys() - kwds.keys()).isdisjoint(utils.SHORTHAND_KEYS) else: continue @@ -774,7 +773,7 @@ def _parse_literal(val: Any, /) -> dict[str, Any]: def _parse_then(statement: _StatementType, kwds: dict[str, Any], /) -> dict[str, Any]: - if isinstance(statement, core.SchemaBase): + if isinstance(statement, SchemaBase): statement = statement.to_dict() elif not isinstance(statement, dict): statement = _parse_literal(statement) @@ -786,7 +785,7 @@ def _parse_otherwise( statement: _StatementType, conditions: _Conditional[Any], kwds: dict[str, Any], / ) -> SchemaBase | _Conditional[Any]: selection: SchemaBase | _Conditional[Any] - if isinstance(statement, core.SchemaBase): + if isinstance(statement, SchemaBase): selection = statement.copy() conditions.update(**kwds) # type: ignore[call-arg] selection.condition = conditions["condition"] @@ -879,7 +878,7 @@ def then(self, statement: _StatementType, /, **kwds: Any) -> Then[Any]: return Then(_Conditional(condition=[condition])) -class Then(core.SchemaBase, t.Generic[_C]): +class Then(SchemaBase, t.Generic[_C]): """ Utility class for ``when-then-otherwise`` conditions. @@ -1716,12 +1715,29 @@ def _top_schema_base( # noqa: ANN202 """ Enforces an intersection type w/ `SchemaBase` & `TopLevelMixin` objects. - Use for instance methods. + Use for methods, called from `TopLevelMixin` that are defined in `SchemaBase`. + + Notes + ----- + - The `super` sub-branch is not statically checked *here*. + - It would widen the inferred intersection to: + - `( | super)` + - Both dunder attributes are not in the `super` type stubs + - Requiring 2x *# type: ignore[attr-defined]* + - However it is required at runtime for any cases that use `super(..., copy)`. + - The inferred type **is** used statically **outside** of this function. """ - if isinstance(obj, core.SchemaBase) and isinstance(obj, TopLevelMixin): + if (isinstance(obj, SchemaBase) and isinstance(obj, TopLevelMixin)) or ( + not TYPE_CHECKING + and ( + isinstance(obj, super) + and issubclass(obj.__self_class__, SchemaBase) + and obj.__thisclass__ is TopLevelMixin + ) + ): return obj else: - msg = f"{type(obj).__name__!r} does not derive from {type(core.SchemaBase).__name__!r}" + msg = f"{type(obj).__name__!r} does not derive from {SchemaBase.__name__!r}" raise TypeError(msg) @@ -1735,7 +1751,7 @@ def to_dict( # noqa: C901 self, validate: bool = True, *, - format: str = "vega-lite", + format: Literal["vega-lite", "vega"] = "vega-lite", ignore: list[str] | None = None, context: dict[str, Any] | None = None, ) -> dict[str, Any]: @@ -1745,31 +1761,25 @@ def to_dict( # noqa: C901 Parameters ---------- validate : bool, optional - If True (default), then validate the output dictionary - against the schema. - format : str, optional - Chart specification format, one of "vega-lite" (default) or "vega" + If True (default), then validate the result against the schema. + format : {"vega-lite", "vega"}, optional + The chart specification format. + The `"vega"` format relies on the active Vega-Lite compiler plugin, which + by default requires the vl-convert-python package. ignore : list[str], optional - A list of keys to ignore. It is usually not needed - to specify this argument as a user. + A list of keys to ignore. context : dict[str, Any], optional - A context dictionary. It is usually not needed - to specify this argument as a user. - - Notes - ----- - Technical: The ignore parameter will *not* be passed to child to_dict - function calls. - - Returns - ------- - dict - The dictionary representation of this chart + A context dictionary. Raises ------ - SchemaValidationError - if validate=True and the dict does not conform to the schema + SchemaValidationError : + If ``validate`` and the result does not conform to the schema. + + Notes + ----- + - ``ignore``, ``context`` are usually not needed to be specified as a user. + - *Technical*: ``ignore`` will **not** be passed to child :meth:`.to_dict()`. """ # Validate format if format not in {"vega-lite", "vega"}: @@ -1807,10 +1817,7 @@ def to_dict( # noqa: C901 # remaining to_dict calls are not at top level context["top_level"] = False - # TopLevelMixin instance does not necessarily have to_dict defined - # but due to how Altair is set up this should hold. - # Too complex to type hint right now - vegalite_spec: Any = super(TopLevelMixin, copy).to_dict( # type: ignore[misc] + vegalite_spec: Any = _top_schema_base(super(TopLevelMixin, copy)).to_dict( validate=validate, ignore=ignore, context=dict(context, pre_transform=False) ) @@ -1862,7 +1869,7 @@ def to_json( indent: int | str | None = 2, sort_keys: bool = True, *, - format: str = "vega-lite", + format: Literal["vega-lite", "vega"] = "vega-lite", ignore: list[str] | None = None, context: dict[str, Any] | None = None, **kwargs: Any, @@ -1873,24 +1880,31 @@ def to_json( Parameters ---------- validate : bool, optional - If True (default), then validate the output dictionary - against the schema. + If True (default), then validate the result against the schema. indent : int, optional The number of spaces of indentation to use. The default is 2. sort_keys : bool, optional If True (default), sort keys in the output. - format : str, optional - The chart specification format. One of "vega-lite" (default) or "vega". - The "vega" format relies on the active Vega-Lite compiler plugin, which + format : {"vega-lite", "vega"}, optional + The chart specification format. + The `"vega"` format relies on the active Vega-Lite compiler plugin, which by default requires the vl-convert-python package. ignore : list[str], optional - A list of keys to ignore. It is usually not needed - to specify this argument as a user. + A list of keys to ignore. context : dict[str, Any], optional - A context dictionary. It is usually not needed - to specify this argument as a user. + A context dictionary. **kwargs Additional keyword arguments are passed to ``json.dumps()`` + + Raises + ------ + SchemaValidationError : + If ``validate`` and the result does not conform to the schema. + + Notes + ----- + - ``ignore``, ``context`` are usually not needed to be specified as a user. + - *Technical*: ``ignore`` will **not** be passed to child :meth:`.to_dict()`. """ if ignore is None: ignore = [] @@ -3697,24 +3711,19 @@ def from_dict( cls: type[_TSchemaBase], dct: dict[str, Any], validate: bool = True ) -> _TSchemaBase: """ - Construct class from a dictionary representation. + Construct a ``Chart`` from a dictionary representation. Parameters ---------- dct : dictionary - The dict from which to construct the class + The dict from which to construct the ``Chart``. validate : boolean If True (default), then validate the input against the schema. - Returns - ------- - obj : Chart object - The wrapped schema - Raises ------ jsonschema.ValidationError : - if validate=True and dct does not conform to the schema + If ``validate`` and ``dct`` does not conform to the schema """ _tp: Any for tp in TopLevelMixin.__subclasses__(): @@ -3731,41 +3740,35 @@ def to_dict( self, validate: bool = True, *, - format: str = "vega-lite", + format: Literal["vega-lite", "vega"] = "vega-lite", ignore: list[str] | None = None, context: dict[str, Any] | None = None, ) -> dict[str, Any]: """ - Convert the chart to a dictionary suitable for JSON export. + Convert the ``Chart`` to a dictionary suitable for JSON export. Parameters ---------- validate : bool, optional - If True (default), then validate the output dictionary - against the schema. - format : str, optional - Chart specification format, one of "vega-lite" (default) or "vega" + If True (default), then validate the result against the schema. + format : {"vega-lite", "vega"}, optional + The chart specification format. + The `"vega"` format relies on the active Vega-Lite compiler plugin, which + by default requires the vl-convert-python package. ignore : list[str], optional - A list of keys to ignore. It is usually not needed - to specify this argument as a user. + A list of keys to ignore. context : dict[str, Any], optional - A context dictionary. It is usually not needed - to specify this argument as a user. - - Notes - ----- - Technical: The ignore parameter will *not* be passed to child to_dict - function calls. - - Returns - ------- - dict - The dictionary representation of this chart + A context dictionary. Raises ------ - SchemaValidationError - if validate=True and the dict does not conform to the schema + SchemaValidationError : + If ``validate`` and the result does not conform to the schema. + + Notes + ----- + - ``ignore``, ``context`` are usually not needed to be specified as a user. + - *Technical*: ``ignore`` will **not** be passed to child :meth:`.to_dict()`. """ context = context or {} kwds: Map = {"validate": validate, "format": format, "ignore": ignore, "context": context} # fmt: skip @@ -3861,7 +3864,7 @@ def _check_if_valid_subspec( ], ) -> None: """Raise a `TypeError` if `spec` is not a valid sub-spec.""" - if not isinstance(spec, core.SchemaBase): + if not isinstance(spec, SchemaBase): msg = f"Only chart objects can be used in {classname}." raise TypeError(msg) for attr in TOPLEVEL_ONLY_KEYS: diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index 9d21ab793..5140073ad 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -40,6 +40,7 @@ from altair import vegalite if TYPE_CHECKING: + from types import ModuleType from typing import ClassVar from referencing import Registry @@ -55,6 +56,7 @@ from typing import Never, Self else: from typing_extensions import Never, Self + _OptionalModule: TypeAlias = "ModuleType | None" ValidationErrorList: TypeAlias = List[jsonschema.exceptions.ValidationError] GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList] @@ -557,9 +559,25 @@ def _resolve_references( class SchemaValidationError(jsonschema.ValidationError): - """A wrapper for jsonschema.ValidationError with friendlier traceback.""" - def __init__(self, obj: SchemaBase, err: jsonschema.ValidationError) -> None: + """ + A wrapper for ``jsonschema.ValidationError`` with friendlier traceback. + + Parameters + ---------- + obj + The instance that failed ``self.validate(...)``. + err + The original ``ValidationError``. + + Notes + ----- + We do not raise `from err` as else the resulting traceback is very long + as it contains part of the Vega-Lite schema. + + It would also first show the less helpful `ValidationError` instead of + the more user friendly `SchemaValidationError`. + """ super().__init__(**err._contents()) self.obj = obj self._errors: GroupedValidationErrors = getattr( @@ -989,88 +1007,45 @@ def to_dict( Parameters ---------- validate : bool, optional - If True (default), then validate the output dictionary - against the schema. + If True (default), then validate the result against the schema. ignore : list[str], optional - A list of keys to ignore. It is usually not needed - to specify this argument as a user. + A list of keys to ignore. context : dict[str, Any], optional - A context dictionary. It is usually not needed - to specify this argument as a user. - - Notes - ----- - Technical: The ignore parameter will *not* be passed to child to_dict - function calls. - - Returns - ------- - dict - The dictionary representation of this object + A context dictionary. Raises ------ SchemaValidationError : - if validate=True and the dict does not conform to the schema + If ``validate`` and the result does not conform to the schema. + + Notes + ----- + - ``ignore``, ``context`` are usually not needed to be specified as a user. + - *Technical*: ``ignore`` will **not** be passed to child :meth:`.to_dict()`. """ - if context is None: - context = {} - if ignore is None: - ignore = [] - # The following return the package only if it has already been - # imported - otherwise they return None. This is useful for - # isinstance checks - for example, if pandas has not been imported, - # then an object is definitely not a `pandas.Timestamp`. - pd_opt = sys.modules.get("pandas") - np_opt = sys.modules.get("numpy") + context = context or {} + ignore = ignore or [] + opts = _get_optional_modules(np_opt="numpy", pd_opt="pandas") if self._args and not self._kwds: - result = _todict( - self._args[0], context=context, np_opt=np_opt, pd_opt=pd_opt - ) + kwds = self._args[0] elif not self._args: kwds = self._kwds.copy() - # parsed_shorthand is added by FieldChannelMixin. - # It's used below to replace shorthand with its long form equivalent - # parsed_shorthand is removed from context if it exists so that it is - # not passed to child to_dict function calls - parsed_shorthand = context.pop("parsed_shorthand", {}) - # Prevent that pandas categorical data is automatically sorted - # when a non-ordinal data type is specifed manually - # or if the encoding channel does not support sorting - if "sort" in parsed_shorthand and ( - "sort" not in kwds or kwds["type"] not in {"ordinal", Undefined} - ): - parsed_shorthand.pop("sort") - - kwds.update( - { - k: v - for k, v in parsed_shorthand.items() - if kwds.get(k, Undefined) is Undefined - } - ) - kwds = { - k: v for k, v in kwds.items() if k not in {*list(ignore), "shorthand"} - } - if "mark" in kwds and isinstance(kwds["mark"], str): - kwds["mark"] = {"type": kwds["mark"]} - result = _todict(kwds, context=context, np_opt=np_opt, pd_opt=pd_opt) + exclude = {*ignore, "shorthand"} + if parsed := context.pop("parsed_shorthand", None): + kwds = _replace_parsed_shorthand(parsed, kwds) + kwds = {k: v for k, v in kwds.items() if k not in exclude} + if (mark := kwds.get("mark")) and isinstance(mark, str): + kwds["mark"] = {"type": mark} else: - msg = ( - f"{self.__class__} instance has both a value and properties : " - "cannot serialize to dict" - ) + msg = f"{type(self)} instance has both a value and properties : cannot serialize to dict" raise ValueError(msg) + result = _todict(kwds, context=context, **opts) if validate: + # NOTE: Don't raise `from err`, see `SchemaValidationError` doc try: self.validate(result) except jsonschema.ValidationError as err: - # We do not raise `from err` as else the resulting - # traceback is very long as it contains part - # of the Vega-Lite schema. It would also first - # show the less helpful ValidationError instead of - # the more user friendly SchemaValidationError raise SchemaValidationError(self, err) from None return result @@ -1090,30 +1065,27 @@ def to_json( Parameters ---------- validate : bool, optional - If True (default), then validate the output dictionary - against the schema. + If True (default), then validate the result against the schema. indent : int, optional The number of spaces of indentation to use. The default is 2. sort_keys : bool, optional If True (default), sort keys in the output. ignore : list[str], optional - A list of keys to ignore. It is usually not needed - to specify this argument as a user. + A list of keys to ignore. context : dict[str, Any], optional - A context dictionary. It is usually not needed - to specify this argument as a user. + A context dictionary. **kwargs Additional keyword arguments are passed to ``json.dumps()`` + Raises + ------ + SchemaValidationError : + If ``validate`` and the result does not conform to the schema. + Notes ----- - Technical: The ignore parameter will *not* be passed to child to_dict - function calls. - - Returns - ------- - str - The JSON specification of the chart object. + - ``ignore``, ``context`` are usually not needed to be specified as a user. + - *Technical*: ``ignore`` will **not** be passed to child :meth:`.to_dict()`. """ if ignore is None: ignore = [] @@ -1141,15 +1113,10 @@ def from_dict( validate : boolean If True (default), then validate the input against the schema. - Returns - ------- - obj : Schema object - The wrapped schema - Raises ------ jsonschema.ValidationError : - if validate=True and dct does not conform to the schema + If ``validate`` and ``dct`` does not conform to the schema """ if validate: cls.validate(dct) @@ -1212,13 +1179,8 @@ def validate_property( cls, name: str, value: Any, schema: dict[str, Any] | None = None ) -> None: """Validate a property against property schema in the context of the rootschema.""" - # The following return the package only if it has already been - # imported - otherwise they return None. This is useful for - # isinstance checks - for example, if pandas has not been imported, - # then an object is definitely not a `pandas.Timestamp`. - pd_opt = sys.modules.get("pandas") - np_opt = sys.modules.get("numpy") - value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt) + opts = _get_optional_modules(np_opt="numpy", pd_opt="pandas") + value = _todict(value, context={}, **opts) props = cls.resolve_references(schema or cls._schema).get("properties", {}) validate_jsonschema( value, props.get(name, {}), rootschema=cls._rootschema or cls._schema @@ -1228,6 +1190,71 @@ def __dir__(self) -> list[str]: return sorted(chain(super().__dir__(), self._kwds)) +def _get_optional_modules(**modules: str) -> dict[str, _OptionalModule]: + """ + Returns packages only if they have already been imported - otherwise they return `None`. + + This is useful for `isinstance` checks. + + For example, if `pandas` has not been imported, then an object is + definitely not a `pandas.Timestamp`. + + Parameters + ---------- + **modules + Keyword-only binding from `{alias: module_name}`. + + Examples + -------- + >>> import pandas as pd # doctest: +SKIP + >>> import polars as pl # doctest: +SKIP + >>> from altair.utils.schemapi import _get_optional_modules # doctest: +SKIP + >>> + >>> _get_optional_modules(pd="pandas", pl="polars", ibis="ibis") # doctest: +SKIP + { + "pd": , + "pl": , + "ibis": None, + } + + If the user later imports ``ibis``, it would appear in subsequent calls. + + >>> import ibis # doctest: +SKIP + >>> + >>> _get_optional_modules(ibis="ibis") # doctest: +SKIP + { + "ibis": , + } + """ + return {k: sys.modules.get(v) for k, v in modules.items()} + + +def _replace_parsed_shorthand( + parsed_shorthand: dict[str, Any], kwds: dict[str, Any] +) -> dict[str, Any]: + """ + `parsed_shorthand` is added by `FieldChannelMixin`. + + It's used below to replace shorthand with its long form equivalent + `parsed_shorthand` is removed from `context` if it exists so that it is + not passed to child `to_dict` function calls. + """ + # Prevent that pandas categorical data is automatically sorted + # when a non-ordinal data type is specifed manually + # or if the encoding channel does not support sorting + if "sort" in parsed_shorthand and ( + "sort" not in kwds or kwds["type"] not in {"ordinal", Undefined} + ): + parsed_shorthand.pop("sort") + + kwds.update( + (k, v) + for k, v in parsed_shorthand.items() + if kwds.get(k, Undefined) is Undefined + ) + return kwds + + TSchemaBase = TypeVar("TSchemaBase", bound=SchemaBase) _CopyImpl = TypeVar("_CopyImpl", SchemaBase, Dict[Any, Any], List[Any])