-
Notifications
You must be signed in to change notification settings - Fork 1
/
directory_merkel_tree.py
217 lines (164 loc) · 6.93 KB
/
directory_merkel_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import os
from os import path
from os import listdir
from os.path import isdir, isfile
from hashlib import sha256
import base64
empty_directory_hash = sha256('empty directory').digest()
def make_dmt(root_directory=os.getcwd(), nonce='', encrypter=None):
if not isdir(root_directory):
raise IOError('The root directory supplied, \'{}\', is not in fact a directory.'.format(root_directory))
directory_contents = listdir(root_directory)
if not directory_contents:
return DirectoryMerkelTree(dmt_hash=empty_directory_hash, children=None)
children = dict()
for filesystem_item in directory_contents:
item_path = path.join(root_directory, filesystem_item)
if isfile(item_path):
filename = filesystem_item
file_path = path.join(root_directory, filename)
with open(file_path, 'r') as f:
file_contents = f.read()
if encrypter:
filename = encrypter.encrypt_filename(filename)
file_contents = encrypter.encrypt(file_contents)
file_hash = sha256(file_contents)
if nonce:
file_hash.update(nonce)
dmt_child = DirectoryMerkelTree(dmt_hash=file_hash.digest(), children=None)
children[filename] = dmt_child
elif isdir(item_path):
subdir_name = filesystem_item
subdir_path = path.join(root_directory, subdir_name)
dmt_subtree = make_dmt(subdir_path, nonce, encrypter)
if encrypter:
subdir_name = encrypter.encrypt_filename(subdir_name)
# Append a slash to facilitate detection of new empty folders upon comparison.
subdir_name += '/'
children[subdir_name] = dmt_subtree
# Item was neither file nor directory...
else:
raise IOError('Item \'{}\' is neither a file nor directory.'.format(item_path))
# Compile all child hashes to compute this tree's hash.
tree_hash = sha256()
for child in children.values():
tree_hash.update(child.dmt_hash)
dmt_tree = DirectoryMerkelTree(dmt_hash=tree_hash.digest(), children=children)
return dmt_tree
def print_tree(tree):
if not tree:
print None
return
if tree.children:
print 'Directory hash = {}'.format(base64.urlsafe_b64encode(tree.dmt_hash))
print 'Contents:'
for name, subtree in tree.children.iteritems():
print
print name
print_tree(subtree)
else:
print 'File hash = {}'.format(base64.urlsafe_b64encode(tree.dmt_hash))
def compute_tree_changes(dmt_new, dmt_old, directory_path=''):
updated, new, deleted = set(), set(), set()
# Base cases:
# Both files or empty directories
if (not dmt_new.children) and (not dmt_old.children):
return updated, new, deleted
# New directory
elif not dmt_old.children:
mutual_filesystem_items = set()
new_filesystem_items = set(dmt_new.children.keys())
deleted_filesystem_items = set()
elif not dmt_new.children:
mutual_filesystem_items = set()
new_filesystem_items = set()
deleted_filesystem_items = set(dmt_old.children.keys())
else:
mutual_filesystem_items = set(dmt_new.children.keys()).intersection(set(dmt_old.children.keys()))
new_filesystem_items = set(dmt_new.children.keys()).difference(set(dmt_old.children.keys()))
deleted_filesystem_items = set(dmt_old.children.keys()).difference(set(dmt_new.children.keys()))
# Compile the set of updated files and directories, as well as any other changes within subdirectories.
for filesystem_item in mutual_filesystem_items:
# Always check subdirectories for e.g file renamings.
if filesystem_item[-1] == '/':
subdir_name = filesystem_item
subdir_path = directory_path + subdir_name
subdir_updated, subdir_new, subdir_deleted = \
compute_tree_changes(dmt_new.children[subdir_name], dmt_old.children[subdir_name], subdir_path)
# Mark the subdirectory if necessary.
if (dmt_old.children[subdir_name].dmt_hash != dmt_new.children[subdir_name].dmt_hash) or \
subdir_updated or subdir_new or subdir_deleted:
updated.add(subdir_path)
# Incorporate differences from within.
updated.update(subdir_updated)
new.update(subdir_new)
deleted.update(subdir_deleted)
# File with differing hash values.
elif dmt_old.children[filesystem_item].dmt_hash != dmt_new.children[filesystem_item].dmt_hash:
filename = filesystem_item
file_path = directory_path + filename
updated.add(file_path)
# Compile the set of newly created files.
for filesystem_item in new_filesystem_items:
item_path = directory_path + filesystem_item
new.add(item_path)
new.update(get_all_paths(dmt_new.children[filesystem_item], directory_path))
# Compile the set of deleted files.
for filesystem_item in deleted_filesystem_items:
item_path = directory_path + filesystem_item
deleted.add(item_path)
deleted.update(get_all_paths(dmt_old.children[filesystem_item], directory_path))
return updated, new, deleted
def get_all_paths(dmt, directory_path=''):
# Base case.
if not dmt.children:
return set()
filesystem_items = set()
for item in dmt.children.keys():
filesystem_items.add(directory_path+item)
# Also get the paths of subdirectory contents.
if item[-1] == '/':
subdir_name = item
subdir_path = directory_path + subdir_name
filesystem_items.add(subdir_path)
filesystem_items.update(get_all_paths(dmt.children[subdir_name], subdir_path))
return filesystem_items
class DirectoryMerkelTree:
def __init__(self, dmt_hash, children):
self.dmt_hash = dmt_hash
self.children = children
def __eq__(self, other):
if not other:
return False
if type(other) is not type(self):
raise TypeError('{} is not equal to {}'.format(type(self), type(other)))
updated, new, deleted = compute_tree_changes(self, other)
if updated or new or deleted:
return False
else:
return True
def __ne__(self, other):
return not (self == other)
def compare_trees(self):
None
def get_updated_tree(self):
None
if __name__ == '__main__':
tree = make_dmt(os.path.join(os.getcwd(), 'personal/'))
print get_all_paths(tree)
print_tree(tree)
tree_a = make_dmt(os.path.join(os.getcwd(), 'testA/'))
tree_b = make_dmt(os.path.join(os.getcwd(), 'testB/'))
assert tree_a != tree_b
changes_a_b = compute_tree_changes(tree_a, tree_b)
changes_b_a = compute_tree_changes(tree_b, tree_a)
print changes_a_b
print changes_b_a
nonce = 'a testing nonce'
tree_a_nonced = make_dmt(os.path.join(os.getcwd(), 'testA/'), nonce=nonce)
tree_b_nonced = make_dmt(os.path.join(os.getcwd(), 'testB/'), nonce=nonce)
assert tree_a_nonced == make_dmt(os.path.join(os.getcwd(), 'testA/'), nonce=nonce)
assert tree_a != tree_a_nonced
changes_a_b_nonced = compute_tree_changes(tree_a_nonced, tree_b_nonced)
assert changes_a_b_nonced == changes_a_b
print 'All tests passed!'