From 379c047fb9127e7aff54ecf8fb6a9c71c7000670 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 14 Jun 2024 14:16:31 -0400 Subject: [PATCH 1/3] Add a YAML-parsing benchmark --- .../benchmarks/bm_yaml/pyproject.toml | 10 ++ .../benchmarks/bm_yaml/requirements.txt | 1 + .../benchmarks/bm_yaml/run_benchmark.py | 91 +++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml create mode 100644 pyperformance/data-files/benchmarks/bm_yaml/requirements.txt create mode 100644 pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py diff --git a/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml b/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml new file mode 100644 index 00000000..e8d30625 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "pyperformance_bm_yaml" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "yaml" +tags = "serialize" diff --git a/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt b/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt new file mode 100644 index 00000000..cf39afa6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt @@ -0,0 +1 @@ +pyyaml==6.0.1 diff --git a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py new file mode 100644 index 00000000..4f12e204 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py @@ -0,0 +1,91 @@ +""" +Script for testing the performance of YAML parsing, using yaml. + +This will dump/load several real world-representative objects a few thousand +times. The methodology below was chosen for was chosen to be similar to +real-world scenarios which operate on single objects at a time. + +This explicitly tests the pure Python implementation in pyyaml, not its C +extension. + +The object structure is copied from the `json_load` benchmark. +""" + + +import random +import sys + + +import pyperf +import yaml + + +DICT = { + 'ads_flags': 0, + 'age': 18, + 'bulletin_count': 0, + 'comment_count': 0, + 'country': 'BR', + 'encrypted_id': 'G9urXXAJwjE', + 'favorite_count': 9, + 'first_name': '', + 'flags': 412317970704, + 'friend_count': 0, + 'gender': 'm', + 'gender_for_display': 'Male', + 'id': 302935349, + 'is_custom_profile_icon': 0, + 'last_name': '', + 'locale_preference': 'pt_BR', + 'member': 0, + 'tags': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], + 'profile_foo_id': 827119638, + 'secure_encrypted_id': 'Z_xxx2dYx3t4YAdnmfgyKw', + 'session_number': 2, + 'signup_id': '201-19225-223', + 'status': 'A', + 'theme': 1, + 'time_created': 1225237014, + 'time_updated': 1233134493, + 'unread_message_count': 0, + 'user_group': '0', + 'username': 'collinwinter', + 'play_count': 9, + 'view_count': 7, + 'zip': ''} + +TUPLE = ( + [265867233, 265868503, 265252341, 265243910, 265879514, + 266219766, 266021701, 265843726, 265592821, 265246784, + 265853180, 45526486, 265463699, 265848143, 265863062, + 265392591, 265877490, 265823665, 265828884, 265753032], 60) + + +def mutate_dict(orig_dict, random_source): + new_dict = dict(orig_dict) + for key, value in new_dict.items(): + rand_val = random_source.random() * sys.maxsize + if isinstance(key, (int, bytes, str)): + new_dict[key] = type(key)(rand_val) + return new_dict + + +random_source = random.Random(5) # Fixed seed. +DICT_GROUP = [mutate_dict(DICT, random_source) for _ in range(3)] + + +def bench_yaml(objs): + for obj in objs: + yaml.load(obj, Loader=yaml.Loader) + + +if __name__ == "__main__": + runner = pyperf.Runner() + runner.metadata['description'] = "Benchmark json.loads()" + + yaml_dict = yaml.dump(DICT) + yaml_tuple = yaml.dump(TUPLE) + yaml_dict_group = yaml.dump(DICT_GROUP) + objs = (yaml_dict, yaml_tuple, yaml_dict_group) + + runner.bench_func('yaml', bench_yaml, objs, inner_loops=20) From e3c590f3da5b0d1622b5e61f8c38b348e7f0e1d3 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 14 Jun 2024 14:28:55 -0400 Subject: [PATCH 2/3] Fix description --- pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py index 4f12e204..316c5930 100644 --- a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py @@ -81,7 +81,7 @@ def bench_yaml(objs): if __name__ == "__main__": runner = pyperf.Runner() - runner.metadata['description'] = "Benchmark json.loads()" + runner.metadata['description'] = "Benchmark yaml.load()" yaml_dict = yaml.dump(DICT) yaml_tuple = yaml.dump(TUPLE) From b267db00423acda93999f3cb19161975355c486b Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 24 Jun 2024 15:11:40 -0400 Subject: [PATCH 3/3] Update pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py Co-authored-by: Eric Snow --- pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py index 316c5930..371fe6f9 100644 --- a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py @@ -2,7 +2,7 @@ Script for testing the performance of YAML parsing, using yaml. This will dump/load several real world-representative objects a few thousand -times. The methodology below was chosen for was chosen to be similar to +times. The methodology below was chosen to be similar to real-world scenarios which operate on single objects at a time. This explicitly tests the pure Python implementation in pyyaml, not its C