-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7a8d157
commit 8276006
Showing
1 changed file
with
32 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,42 @@ | ||
import csv | ||
import os | ||
import re | ||
|
||
DATADIR='../data' | ||
PARSER = '^page (?P<page>[0-9]+) has (?P<pixels>[0-9]+) pixels that differ$' | ||
PARSED_LIST = [ | ||
['Page', 'Pixels Differ [N]', 'Differing Pixels Proportion', 'Date', 'Environment'], | ||
] | ||
|
||
|
||
# This should be dynamically/empirically determined, perhaps with a blank page of a colour which is certain to not appear in the document. | ||
# For now we rely on the fact that we explicitly rasterize at 300dpi in the diff generation script (this could be different); | ||
# and that we know this is outputted to a4 paper (this could also be different). | ||
pixels_total = 8699840 | ||
|
||
parser = "^page (?P<page>[0-9]+) has (?P<pixels>[0-9]+) pixels that differ$" | ||
|
||
parsed_list = [ | ||
["Page", "Pixels Differ [N]", "Differing Pixels Proportion"], | ||
] | ||
with open("../data/paperdiff.log") as f: | ||
for line in f: | ||
print(line) | ||
try: | ||
m = re.match(parser, line).groupdict() | ||
except AttributeError: | ||
continue | ||
pixels_differ_proportion = int(m["pixels"]) / pixels_total | ||
pixels_differ_proportion = float("{:.5f}".format(pixels_differ_proportion)) | ||
#pixels_differ_percent = pixels_differ_proportion * 100 | ||
# Adding 1 to the page number to obtain the same numbering as in the document itself. | ||
parsed_list.append([int(m["page"])+1, m["pixels"], pixels_differ_proportion]) | ||
for i in os.listdir(DATADIR): | ||
if not i[-3:] == 'log': | ||
continue | ||
print(f'Processing paperdiff log file `{i}`...') | ||
execution_metadata = re.match('^paperdiff_(?P<environment>.+)_(?P<date>.+)\..*$', i).groupdict() | ||
with open(os.path.join(DATADIR,i)) as f: | ||
for line in f: | ||
try: | ||
m = re.match(PARSER, line).groupdict() | ||
except AttributeError: | ||
continue | ||
pixels_differ_proportion = int(m['pixels']) / pixels_total | ||
pixels_differ_proportion = float('{:.5f}'.format(pixels_differ_proportion)) | ||
#pixels_differ_percent = pixels_differ_proportion * 100 | ||
# Adding 1 to the page number to obtain the same numbering as in the document itself. | ||
PARSED_LIST.append([ | ||
int(m['page'])+1, | ||
m['pixels'], | ||
pixels_differ_proportion, | ||
execution_metadata['date'], | ||
execution_metadata['environment'], | ||
]) | ||
|
||
with open("../data/paperdiff.csv", "w", newline="") as f: | ||
with open('../data/paperdiff.csv', 'w', newline='') as f: | ||
writer = csv.writer(f) | ||
writer.writerows(parsed_list) | ||
writer.writerows(PARSED_LIST) |