This repository has been archived by the owner on Oct 24, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
diff_main.py
executable file
·148 lines (110 loc) · 4.32 KB
/
diff_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3
import argparse
import json
import pickle
import re
import sys
from logging import basicConfig, INFO, getLogger
from os import path, listdir
from multi_access.dump.dump import from_file
logger = getLogger("makeradmin")
class SettableWrapper(object):
"""Really ugly wrapper for making objects hashable and comparable so that they can be converted to 'set'"""
def __init__(self, obj):
try:
repr(obj)
except Exception as e:
logger.error(f"Could not 'repr' the object {obj}, and thus not hash it")
raise e
self.obj = obj
def __getattr__(self, attr):
try:
return getattr(self.obj, attr)
except Exception as e:
raise e
def __hash__(self):
return hash(str(self))
def assert_arguments_are_class_instances(func):
def wrapper(*args):
for arg in args:
assert isinstance(arg, SettableWrapper), "Can only compare 'SettableWrapper' objects"
return func(*args)
return wrapper
@assert_arguments_are_class_instances
def __lt__(self, other):
return repr(self) < repr(other)
@assert_arguments_are_class_instances
def __eq__(self, other):
return self.obj == other.obj
def __repr__(self):
return repr(self.obj)
def __str__(self):
return str(self.obj)
def monkeypatch_settable(obj):
try:
hash(obj)
except TypeError as e:
obj = SettableWrapper(obj)
finally:
return obj
def monkeypatch_settable_for_list(list_obj):
return [monkeypatch_settable(i) for i in list_obj]
def print_diff(old, new):
# Sanity checking tables and colums should be same.
assert(set(old.keys()) == set(new.keys()))
tables = sorted(old.keys())
table_summary = {k: {"added": set(), "removed": set()} for k in tables}
for table in tables:
old_table = old[table]
new_table = new[table]
assert(old_table['columns'] == new_table['columns'])
# Check for duplicates or algorithm won't work.
old_rows = monkeypatch_settable_for_list(old_table['rows'])
old_set = set(old_rows)
assert(len(old_rows) == len(set(old_rows)))
new_rows = monkeypatch_settable_for_list(new_table['rows'])
new_set = set(new_rows)
assert(len(new_rows) == len(set(new_rows)))
# Check for added and removed rows.
added = new_set - old_set
removed = old_set - new_set
if added or removed:
print(f"table {table} diffing, columns {old_table['columns']}")
diff = sorted([('ADD', d) for d in added] + [('DEL', d) for d in removed], key=lambda x: x[1])
for what, d in diff:
print(f" {what}: {d!r}")
table_summary[table] = {"added": added, "removed": removed}
print("\nDiff summary:")
for table in tables:
n_added = len(table_summary[table]["added"])
n_removed = len(table_summary[table]["removed"])
if n_added > 0 or n_removed > 0:
print(f"{table:>17s} (+{n_added}, -{n_removed}): {n_added*'+'}{n_removed*'-'}")
def main():
basicConfig(format='%(asctime)s %(levelname)s [%(pathname)s:%(lineno)d]: %(message)s',
stream=sys.stderr, level=INFO)
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--in-dir", default='.',
help="Dir where diff files are stored.")
parser.add_argument("-e", "--extension", default='pkl',
help="Format pkl or json.")
args = parser.parse_args()
directory = args.in_dir
def use_file(filename):
if not path.isfile(path.join(directory, filename)):
return False
m = re.match(r'dump-(\d+).' + args.extension, filename)
if not m:
return 0
return int(m.group(1))
filenames = [path.join(directory, f) for f in sorted(listdir(directory)) if use_file(f)]
prev_dump = None
for filename in filenames:
logger.info(f'loading dump file {filename}')
next_dump = from_file(filename)
if prev_dump:
logger.info(f'diffing to previous dump')
print_diff(prev_dump, next_dump)
prev_dump = next_dump
if __name__ == '__main__':
main()