Skip to content

Commit 0fdd819

Browse files
committed
Add some parser benchmarks
These tests are based on two things: some from WPT, and some based on Anolis
1 parent d49afd3 commit 0fdd819

36 files changed

+8220
-0
lines changed

Diff for: benchmarks/bench_html.py

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import io
2+
import os
3+
import sys
4+
5+
import pyperf
6+
7+
sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
8+
import html5lib # noqa: E402
9+
10+
11+
def bench_parse(fh, treebuilder):
12+
fh.seek(0)
13+
html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)
14+
15+
16+
def bench_serialize(loops, fh, treebuilder):
17+
fh.seek(0)
18+
doc = html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)
19+
20+
range_it = range(loops)
21+
t0 = pyperf.perf_counter()
22+
23+
for loops in range_it:
24+
html5lib.serialize(doc, tree=treebuilder, encoding="ascii", inject_meta_charset=False)
25+
26+
return pyperf.perf_counter() - t0
27+
28+
29+
BENCHMARKS = ["parse", "serialize"]
30+
31+
32+
def add_cmdline_args(cmd, args):
33+
if args.benchmark:
34+
cmd.append(args.benchmark)
35+
36+
37+
if __name__ == "__main__":
38+
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
39+
runner.metadata["description"] = "Run benchmarks based on Anolis"
40+
runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS)
41+
42+
args = runner.parse_args()
43+
if args.benchmark:
44+
benchmarks = (args.benchmark,)
45+
else:
46+
benchmarks = BENCHMARKS
47+
48+
with open(os.path.join(os.path.dirname(__file__), "data", "html.html"), "rb") as fh:
49+
source = io.BytesIO(fh.read())
50+
51+
if "parse" in benchmarks:
52+
for tb in ("etree", "dom", "lxml"):
53+
runner.bench_func("html_parse_%s" % tb, bench_parse, source, tb)
54+
55+
if "serialize" in benchmarks:
56+
for tb in ("etree", "dom", "lxml"):
57+
runner.bench_time_func("html_serialize_%s" % tb, bench_serialize, source, tb)

Diff for: benchmarks/bench_wpt.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import io
2+
import os
3+
import sys
4+
5+
import pyperf
6+
7+
sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
8+
import html5lib # noqa: E402
9+
10+
11+
def bench_html5lib(fh):
12+
fh.seek(0)
13+
html5lib.parse(fh, treebuilder="etree", useChardet=False)
14+
15+
16+
def add_cmdline_args(cmd, args):
17+
if args.benchmark:
18+
cmd.append(args.benchmark)
19+
20+
21+
BENCHMARKS = {}
22+
for root, dirs, files in os.walk(os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "wpt")):
23+
for f in files:
24+
if f.endswith(".html"):
25+
BENCHMARKS[f[: -len(".html")]] = os.path.join(root, f)
26+
27+
28+
if __name__ == "__main__":
29+
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
30+
runner.metadata["description"] = "Run parser benchmarks from WPT"
31+
runner.argparser.add_argument("benchmark", nargs="?", choices=sorted(BENCHMARKS))
32+
33+
args = runner.parse_args()
34+
if args.benchmark:
35+
benchmarks = (args.benchmark,)
36+
else:
37+
benchmarks = sorted(BENCHMARKS)
38+
39+
for bench in benchmarks:
40+
name = "wpt_%s" % bench
41+
path = BENCHMARKS[bench]
42+
with open(path, "rb") as fh:
43+
fh2 = io.BytesIO(fh.read())
44+
45+
runner.bench_func(name, bench_html5lib, fh2)

Diff for: benchmarks/data/README.md

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
The files in this data are derived from:
2+
3+
* `html.html`: from [html](http://github.com/whatwg/html), revision
4+
77db356a293f2b152b648c836b6989d17afe42bb. This is the first 5000 lines of `source`. (This is
5+
representative of the input to [Anolis](https://bitbucket.org/ms2ger/anolis/); first 5000 lines
6+
chosen to make it parse in a reasonable time.)
7+
8+
* `wpt`: see `wpt/README.md`.

0 commit comments

Comments
 (0)