Skip to content

Commit

Permalink
Parse multiple executions
Browse files Browse the repository at this point in the history
  • Loading branch information
TheChymera committed Sep 19, 2023
1 parent 7a8d157 commit 8276006
Showing 1 changed file with 32 additions and 19 deletions.
51 changes: 32 additions & 19 deletions paper/source/prepare/paperdiff_parse.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,42 @@
import csv
import os
import re

DATADIR='../data'
PARSER = '^page (?P<page>[0-9]+) has (?P<pixels>[0-9]+) pixels that differ$'
PARSED_LIST = [
['Page', 'Pixels Differ [N]', 'Differing Pixels Proportion', 'Date', 'Environment'],
]


# This should be dynamically/empirically determined, perhaps with a blank page of a colour which is certain to not appear in the document.
# For now we rely on the fact that we explicitly rasterize at 300dpi in the diff generation script (this could be different);
# and that we know this is outputted to a4 paper (this could also be different).
pixels_total = 8699840

parser = "^page (?P<page>[0-9]+) has (?P<pixels>[0-9]+) pixels that differ$"

parsed_list = [
["Page", "Pixels Differ [N]", "Differing Pixels Proportion"],
]
with open("../data/paperdiff.log") as f:
for line in f:
print(line)
try:
m = re.match(parser, line).groupdict()
except AttributeError:
continue
pixels_differ_proportion = int(m["pixels"]) / pixels_total
pixels_differ_proportion = float("{:.5f}".format(pixels_differ_proportion))
#pixels_differ_percent = pixels_differ_proportion * 100
# Adding 1 to the page number to obtain the same numbering as in the document itself.
parsed_list.append([int(m["page"])+1, m["pixels"], pixels_differ_proportion])
for i in os.listdir(DATADIR):
if not i[-3:] == 'log':
continue
print(f'Processing paperdiff log file `{i}`...')
execution_metadata = re.match('^paperdiff_(?P<environment>.+)_(?P<date>.+)\..*$', i).groupdict()
with open(os.path.join(DATADIR,i)) as f:
for line in f:
try:
m = re.match(PARSER, line).groupdict()
except AttributeError:
continue
pixels_differ_proportion = int(m['pixels']) / pixels_total
pixels_differ_proportion = float('{:.5f}'.format(pixels_differ_proportion))
#pixels_differ_percent = pixels_differ_proportion * 100
# Adding 1 to the page number to obtain the same numbering as in the document itself.
PARSED_LIST.append([
int(m['page'])+1,
m['pixels'],
pixels_differ_proportion,
execution_metadata['date'],
execution_metadata['environment'],
])

with open("../data/paperdiff.csv", "w", newline="") as f:
with open('../data/paperdiff.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(parsed_list)
writer.writerows(PARSED_LIST)

0 comments on commit 8276006

Please sign in to comment.