From 379c047fb9127e7aff54ecf8fb6a9c71c7000670 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Fri, 14 Jun 2024 14:16:31 -0400
Subject: [PATCH 1/3] Add a YAML-parsing benchmark

---
 .../benchmarks/bm_yaml/pyproject.toml         | 10 ++
 .../benchmarks/bm_yaml/requirements.txt       |  1 +
 .../benchmarks/bm_yaml/run_benchmark.py       | 91 +++++++++++++++++++
 3 files changed, 102 insertions(+)
 create mode 100644 pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml
 create mode 100644 pyperformance/data-files/benchmarks/bm_yaml/requirements.txt
 create mode 100644 pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py

diff --git a/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml b/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml
new file mode 100644
index 00000000..e8d30625
--- /dev/null
+++ b/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "pyperformance_bm_yaml"
+requires-python = ">=3.8"
+dependencies = ["pyperf"]
+urls = {repository = "https://github.com/python/pyperformance"}
+dynamic = ["version"]
+
+[tool.pyperformance]
+name = "yaml"
+tags = "serialize"
diff --git a/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt b/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt
new file mode 100644
index 00000000..cf39afa6
--- /dev/null
+++ b/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt
@@ -0,0 +1 @@
+pyyaml==6.0.1
diff --git a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
new file mode 100644
index 00000000..4f12e204
--- /dev/null
+++ b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
@@ -0,0 +1,91 @@
+"""
+Script for testing the performance of YAML parsing, using yaml.
+
+This will dump/load several real world-representative objects a few thousand
+times. The methodology below was chosen for was chosen to be similar to
+real-world scenarios which operate on single objects at a time.
+
+This explicitly tests the pure Python implementation in pyyaml, not its C
+extension.
+
+The object structure is copied from the `json_load` benchmark.
+"""
+
+
+import random
+import sys
+
+
+import pyperf
+import yaml
+
+
+DICT = {
+    'ads_flags': 0,
+    'age': 18,
+    'bulletin_count': 0,
+    'comment_count': 0,
+    'country': 'BR',
+    'encrypted_id': 'G9urXXAJwjE',
+    'favorite_count': 9,
+    'first_name': '',
+    'flags': 412317970704,
+    'friend_count': 0,
+    'gender': 'm',
+    'gender_for_display': 'Male',
+    'id': 302935349,
+    'is_custom_profile_icon': 0,
+    'last_name': '',
+    'locale_preference': 'pt_BR',
+    'member': 0,
+    'tags': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
+    'profile_foo_id': 827119638,
+    'secure_encrypted_id': 'Z_xxx2dYx3t4YAdnmfgyKw',
+    'session_number': 2,
+    'signup_id': '201-19225-223',
+    'status': 'A',
+    'theme': 1,
+    'time_created': 1225237014,
+    'time_updated': 1233134493,
+    'unread_message_count': 0,
+    'user_group': '0',
+    'username': 'collinwinter',
+    'play_count': 9,
+    'view_count': 7,
+    'zip': ''}
+
+TUPLE = (
+    [265867233, 265868503, 265252341, 265243910, 265879514,
+     266219766, 266021701, 265843726, 265592821, 265246784,
+     265853180, 45526486, 265463699, 265848143, 265863062,
+     265392591, 265877490, 265823665, 265828884, 265753032], 60)
+
+
+def mutate_dict(orig_dict, random_source):
+    new_dict = dict(orig_dict)
+    for key, value in new_dict.items():
+        rand_val = random_source.random() * sys.maxsize
+        if isinstance(key, (int, bytes, str)):
+            new_dict[key] = type(key)(rand_val)
+    return new_dict
+
+
+random_source = random.Random(5)  # Fixed seed.
+DICT_GROUP = [mutate_dict(DICT, random_source) for _ in range(3)]
+
+
+def bench_yaml(objs):
+    for obj in objs:
+        yaml.load(obj, Loader=yaml.Loader)
+
+
+if __name__ == "__main__":
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Benchmark json.loads()"
+
+    yaml_dict = yaml.dump(DICT)
+    yaml_tuple = yaml.dump(TUPLE)
+    yaml_dict_group = yaml.dump(DICT_GROUP)
+    objs = (yaml_dict, yaml_tuple, yaml_dict_group)
+
+    runner.bench_func('yaml', bench_yaml, objs, inner_loops=20)

From e3c590f3da5b0d1622b5e61f8c38b348e7f0e1d3 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Fri, 14 Jun 2024 14:28:55 -0400
Subject: [PATCH 2/3] Fix description

---
 pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
index 4f12e204..316c5930 100644
--- a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
+++ b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
@@ -81,7 +81,7 @@ def bench_yaml(objs):
 
 if __name__ == "__main__":
     runner = pyperf.Runner()
-    runner.metadata['description'] = "Benchmark json.loads()"
+    runner.metadata['description'] = "Benchmark yaml.load()"
 
     yaml_dict = yaml.dump(DICT)
     yaml_tuple = yaml.dump(TUPLE)

From b267db00423acda93999f3cb19161975355c486b Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Mon, 24 Jun 2024 15:11:40 -0400
Subject: [PATCH 3/3] Update
 pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
---
 pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
index 316c5930..371fe6f9 100644
--- a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
+++ b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
@@ -2,7 +2,7 @@
 Script for testing the performance of YAML parsing, using yaml.
 
 This will dump/load several real world-representative objects a few thousand
-times. The methodology below was chosen for was chosen to be similar to
+times. The methodology below was chosen  to be similar to
 real-world scenarios which operate on single objects at a time.
 
 This explicitly tests the pure Python implementation in pyyaml, not its C