Add benchmark suite for compilation (#542)

lapp0 · web-flow · commit d534c2f494db · 2024-01-25T12:36:57.000+01:00
diff --git a/docs/community/contribute.md b/docs/community/contribute.md
@@ -56,6 +56,16 @@ And run the code style checks:
 pre-commit run --all-files
 ```
 
+#### Performance testing
+
+Run benchmark tests:
+
+```python
+pytest --benchmark-only
+```
+
+([other pytest-benchmark command line options](https://pytest-benchmark.readthedocs.io/en/latest/usage.html#commandline-options))
+
 ### Open a Pull Request
 
 Create a new branch on your fork, commit and push the changes:
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ dynamic = ["version"]
 test = [
     "pre-commit",
     "pytest",
+    "pytest-benchmark",
     "pytest-cov",
     "pytest-mock",
     "transformers",
diff --git a/tests/benchmark/conftest.py b/tests/benchmark/conftest.py
@@ -0,0 +1,15 @@
+import pytest
+
+from outlines.fsm.fsm import RegexFSM
+from outlines.models.transformers import TransformerTokenizer
+
+
+@pytest.fixture
+def tokenizer():
+    return TransformerTokenizer("gpt2")
+
+
+@pytest.fixture
+def ensure_numba_compiled(tokenizer):
+    RegexFSM("a", tokenizer)
+    return True
diff --git a/tests/benchmark/test_benchmark_json_schema.py b/tests/benchmark/test_benchmark_json_schema.py
@@ -0,0 +1,92 @@
+import pytest
+
+import outlines
+
+outlines.disable_cache()
+
+from outlines.fsm.fsm import RegexFSM  # noqa: E402
+from outlines.fsm.json_schema import build_regex_from_object  # noqa: E402
+
+simple_schema = """{
+        "$defs": {
+            "Armor": {
+                "enum": ["leather", "chainmail", "plate"],
+                "title": "Armor",
+                "type": "string"
+            }
+        },
+        "properties": {
+            "name": {"maxLength": 10, "title": "Name", "type": "string"},
+            "age": {"title": "Age", "type": "integer"},
+            "armor": {"$ref": "#/$defs/Armor"},
+            "strength": {"title": "Strength", "type": "integer"}\
+        },
+        "required": ["name", "age", "armor", "strength"],
+        "title": "Character",
+        "type": "object"
+    }"""
+
+
+complex_schema = """{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "title": "Schema for a recording",
+  "type": "object",
+  "definitions": {
+    "artist": {
+      "type": "object",
+      "properties": {
+        "id": {"type": "number"},
+        "name": {"type": "string"},
+        "functions": {
+          "type": "array",
+          "items": {"type": "string"}
+        }
+      },
+      "required": ["id", "name", "functions"]
+    }
+  },
+  "properties": {
+    "id": {"type": "number"},
+    "work": {
+      "type": "object",
+      "properties": {
+        "id": {"type": "number"},
+        "name": {"type": "string"},
+        "composer": {"$ref": "#/definitions/artist"}
+      }
+    },
+    "recording_artists": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/artist"}
+    }
+  },
+  "required": ["id", "work", "recording_artists"]
+}"""
+
+
+schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)
+
+
+@pytest.mark.parametrize("schema_name", schemas.keys())
+def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
+    """Benchmark convert json schema to regex"""
+    schema = schemas[schema_name]
+    benchmark.pedantic(
+        build_regex_from_object,
+        args=(schema,),
+        rounds=8,
+    )
+
+
+@pytest.mark.parametrize("schema_name", schemas.keys())
+def test_benchmark_json_schema_to_fsm(
+    benchmark, tokenizer, ensure_numba_compiled, schema_name
+):
+    """Benchmark compile json schema as FSM"""
+    schema = schemas[schema_name]
+    regex = build_regex_from_object(schema)
+    benchmark.pedantic(
+        RegexFSM,
+        args=(regex, tokenizer),
+        rounds=8,
+    )
diff --git a/tests/benchmark/test_benchmark_numba_compile.py b/tests/benchmark/test_benchmark_numba_compile.py
@@ -0,0 +1,33 @@
+import importlib
+
+import interegular
+import numba
+
+import outlines
+
+outlines.disable_cache()
+
+
+def test_benchmark_compile_numba(benchmark, tokenizer, mocker):
+    """Compile a basic regex to benchmark the numba compilation time"""
+
+    def setup():
+        from outlines.fsm import regex
+
+        original_njit = numba.njit
+
+        def mock_njit(*args, **kwargs):
+            kwargs["cache"] = False
+            return original_njit(*args, **kwargs)
+
+        mocker.patch("numba.njit", new=mock_njit)
+        importlib.reload(regex)
+
+        regex_pattern, _ = regex.make_deterministic_fsm(
+            interegular.parse_pattern("a").to_fsm().reduce()
+        )
+        return (regex, regex_pattern, tokenizer), {}
+
+    benchmark.pedantic(
+        lambda r, *args: r.create_fsm_index_tokenizer(*args), rounds=2, setup=setup
+    )
diff --git a/tests/benchmark/test_benchmark_regex_fsm.py b/tests/benchmark/test_benchmark_regex_fsm.py
@@ -0,0 +1,32 @@
+import pytest
+
+import outlines
+
+outlines.disable_cache()
+
+from outlines.fsm.fsm import RegexFSM  # noqa: E402
+
+regex_samples = {
+    "email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
+    "complex_phone": "\\+?\\d{1,4}?[-.\\s]?\\(?\\d{1,3}?\\)?[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,9}",
+    "simple_phone": "\\+?[1-9][0-9]{7,14}",
+    "date": r"([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])|([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])",
+    "time": r"(0?[1-9]|1[0-2]):[0-5]\d\s?(am|pm)?",
+    "ip": r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)",
+    "url": r"(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?",
+    "ssn": r"\d{3}-\d{2}-\d{4}",
+    "complex_span_constrained_relation_extraction": "(['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?\\s\\|\\s([^|\\(\\)\n]{1,})\\s\\|\\s['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?(\\s\\|\\s\\(([^|\\(\\)\n]{1,})\\s\\|\\s([^|\\(\\)\n]{1,})\\))*\\n)*",
+}
+
+
+@pytest.mark.parametrize("regex_name", regex_samples.keys())
+def test_benchmark_regex_to_fsm(
+    benchmark, tokenizer, ensure_numba_compiled, regex_name
+):
+    """Benchmark converting regex to FSM"""
+    regex_str = regex_samples[regex_name]
+    benchmark.pedantic(
+        RegexFSM,
+        args=(regex_str, tokenizer),
+        rounds=8,
+    )