executablebooks · chrisjsewell · May 31, 2023 · Mar 15, 2023 · Mar 15, 2023 · Mar 25, 2023
diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
@@ -0,0 +1,41 @@
+# OSS-Fuzz integration
+
+In principle, core Markdown parsing is designed to never except/crash on any input,
+and so [fuzzing](https://en.wikipedia.org/wiki/Fuzzing) can be used to test this conformance.
+This folder contains fuzzers which are principally run downstream as part of the <https://github.com/google/oss-fuzz> infrastructure.
+
+Any file that matches `fuzz_*.py` in this repository will be built and run on OSS-Fuzz
+(see <https://github.com/google/oss-fuzz/blob/master/projects/markdown-it-py/build.sh>).
+
+See <https://google.github.io/oss-fuzz/advanced-topics/ideal-integration> for full details.
+
+## CI integration
+
+Fuzzing essentially runs forever, or until a crash is found, therefore it cannot be fully integrated into local continous integration testing.
+The workflow in `.github/workflows/fuzz.yml` though runs a brief fuzzing on code changed in a PR,
+which can be used to provide early warning on code changes.
+
+## Reproducing crash failures
+
+If OSS-Fuzz (or the CI workflow) identifies a crash, it will produce a "minimized testcase" file
+(e.g. <https://oss-fuzz.com/testcase-detail/5424112454729728>).
+
+To reproduce this crash locally, the easiest way is to run the [tox](https://tox.wiki/) environment, provided in this repository, against the test file (see `tox.ini`):
+
+```
+tox -e fuzz path/to/testcase
+```
+
+This idempotently sets up a local python environment with markdown-it-py (local dev) and [Atheris](https://pypi.org/project/atheris/) installed,
+clones <https://github.com/google/oss-fuzz> into it,
+and builds the fuzzers.
+Then the testcase is run within this environment.
+
+If you wish to simply run the full fuzzing process,
+you can activate this environment, then run e.g.:
+
+```
+python .tox/fuzz/oss-fuzz/infra/helper.py run_fuzzer markdown-it-py fuzz_markdown
+```
+
+For a more thorough guide on reproducing, see: https://google.github.io/oss-fuzz/advanced-topics/reproducing/
diff --git a/tests/fuzz/fuzz_markdown.py b/tests/fuzz/fuzz_markdown.py
@@ -0,0 +1,23 @@
+import sys
+
+import atheris
+
+from markdown_it import MarkdownIt
+
+
+def TestOneInput(data):
+    fdp = atheris.FuzzedDataProvider(data)
+    md = MarkdownIt()
+    raw_markdown = fdp.ConsumeUnicodeNoSurrogates(sys.maxsize)
+    md.parse(raw_markdown)
+    md.render(raw_markdown)
+
+
+def main():
+    atheris.instrument_all()
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/fuzz/fuzz_markdown_extended.py b/tests/fuzz/fuzz_markdown_extended.py
@@ -0,0 +1,53 @@
+import sys
+
+import atheris
+
+# Beautified from auto-generated fuzzer at:
+# https://github.com/ossf/fuzz-introspector/pull/872#issuecomment-1450847118
+# Auto-fuzz heuristics used: py-autofuzz-heuristics-4.1
+# Imports by the generated code
+import markdown_it
+
+
+def TestOneInput(data):
+    fdp = atheris.FuzzedDataProvider(data)
+    val_1 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024))
+    val_2 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024))
+    val_3 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+    val_4 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+    val_5 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+    val_6 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+    val_7 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+    val_8 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+    val_9 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+    val_10 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256))
+
+    try:
+        c1 = markdown_it.main.MarkdownIt()
+        c1.render(val_1)
+        c1.parse(val_2)
+        c1.renderInline(val_3)
+        c1.parseInline(val_4)
+        c1.normalizeLink(val_5)
+        c1.normalizeLinkText(val_6)
+        c1.disable(val_7)
+        c1.enable(val_8)
+        c1.validateLink(val_9)
+        c1.configure(val_10)
+    except (
+        ValueError,
+        KeyError,
+        TypeError,
+    ):
+        # Exceptions thrown by the hit code.
+        pass
+
+
+def main():
+    atheris.instrument_all()
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()
+
+
+if __name__ == "__main__":
+    main()