From d446c2fe7e59c93928271a48475150193784a684 Mon Sep 17 00:00:00 2001 From: Cory Todd Date: Tue, 5 Sep 2023 14:05:50 -0700 Subject: [PATCH 1/2] introduce Serializer class We have never handled the JSONDecodeError so consumers are assumed to be catching that exception on their end. This means we cannot wrap all the parser errors into a unified type (e.g. ValueError) without breaking the API. Keep API compatibility on the loader functions by introducing a Serializer class that the cli tool can utilize. This allows the cli to avoid knowing which specific serializer errors to handle. The library itself maintains its current API. Signed-off-by: Cory Todd --- jsondiff/__init__.py | 51 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/jsondiff/__init__.py b/jsondiff/__init__.py index a984ad8..5e86701 100644 --- a/jsondiff/__init__.py +++ b/jsondiff/__init__.py @@ -4,6 +4,9 @@ import json import yaml +from json import JSONDecodeError +from yaml import YAMLError + from .symbols import * from .symbols import Symbol @@ -55,6 +58,10 @@ def __init__(self, **kwargs): self.kwargs = kwargs def __call__(self, src): + """Parse and return JSON data + :param src: str|file-like source + :return: dict parsed data + """ if isinstance(src, string_types): return json.loads(src, **self.kwargs) else: @@ -74,6 +81,47 @@ def __call__(self, src): """ return yaml.safe_load(src) +class Serializer: + """Serializer helper loads and stores object data + :param file_format: str json or yaml + :param indent: int Output indentation in spaces + :raise ValueError: file_path does not contains valid file_format data + """ + + def __init__(self, file_format, indent): + # pyyaml _can_ load json but is ~20 times slower and has known issues so use + # the json from stdlib when json is specified. + self.serializers = { + "json": (JsonLoader(), JsonDumper(indent=indent)), + "yaml": (YamlLoader(), YamlDumper(indent=indent)), + } + self.file_format = file_format + if file_format not in self.serializers: + raise ValueError(f"Unsupported serialization format {file_format}, expected one of {self.serializers.keys()}") + + def deserialize_file(self, src): + """Deserialize file from the specified format + :param file_path: str path to file + :param src: str|file-like source + :return dict + :raise ValueError: file_path does not contain valid file_format data + """ + loader, _ = self.serializers[self.file_format] + try: + parsed = loader(src) + except (JSONDecodeError, YAMLError) as ex: + raise ValueError(f"Invalid {self.file_format} file") from ex + return parsed + + def serialize_data(self, obj, stream): + """Serialize obj and write to stream + :param obj: dict to serialize + :param stream: Writeable stream + """ + _, dumper = self.serializers[self.file_format] + dumper(obj, stream) + + class JsonDiffSyntax(object): def emit_set_diff(self, a, b, s, added, removed): raise NotImplementedError() @@ -667,5 +715,6 @@ def similarity(a, b, cls=JsonDiffer, **kwargs): "JsonDumper", "JsonLoader", "YamlDumper", - "YamlLoader" + "YamlLoader", + "Serializer", ] From a33a3714227266287dc0186641c7daae4454ebbe Mon Sep 17 00:00:00 2001 From: Cory Todd Date: Tue, 5 Sep 2023 14:09:49 -0700 Subject: [PATCH 2/2] cli: handle malformed input files Do not show a stacktrace on the cli tool when an invalid file is specified. Instead, use the new Serializer class to abstract the load process and handle the unified ValueError. Also handle FileNotFound because that's a fairly normal user input scenario as well. Set return code on sys.exit to indicate abnormal exit. Fixes #71 Signed-off-by: Cory Todd --- jsondiff/cli.py | 66 +++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/jsondiff/cli.py b/jsondiff/cli.py index a6e1a12..dd066e4 100644 --- a/jsondiff/cli.py +++ b/jsondiff/cli.py @@ -2,6 +2,16 @@ import jsondiff import sys +def load_file(serializer, file_path): + with open(file_path, "r") as f: + parsed = None + try: + parsed = serializer.deserialize_file(f) + except ValueError: + print(f"{file_path} is not valid {serializer.file_format}") + except FileNotFoundError: + print(f"{file_path} does not exist") + return parsed def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -18,35 +28,33 @@ def main(): args = parser.parse_args() - # pyyaml _can_ load json but is ~20 times slower and has known issues so use - # the json from stdlib when json is specified. - serializers = { - "json": (jsondiff.JsonLoader(), jsondiff.JsonDumper(indent=args.indent)), - "yaml": (jsondiff.YamlLoader(), jsondiff.YamlDumper(indent=args.indent)), - } - loader, dumper = serializers[args.format] - - with open(args.first, "r") as f: - with open(args.second, "r") as g: - jf = loader(f) - jg = loader(g) - if args.patch: - x = jsondiff.patch( - jf, - jg, - marshal=True, - syntax=args.syntax - ) - else: - x = jsondiff.diff( - jf, - jg, - marshal=True, - syntax=args.syntax - ) - - dumper(x, sys.stdout) + serializer = jsondiff.Serializer(args.format, args.indent) + parsed_first = load_file(serializer, args.first) + parsed_second = load_file(serializer, args.second) + + if not (parsed_first and parsed_second): + return 1 + + if args.patch: + x = jsondiff.patch( + parsed_first, + parsed_second, + marshal=True, + syntax=args.syntax + ) + else: + x = jsondiff.diff( + parsed_first, + parsed_second, + marshal=True, + syntax=args.syntax + ) + + serializer.serialize_data(x, sys.stdout) + + return 0 if __name__ == '__main__': - main() + ret = main() + sys.exit(ret)