forked from FutureDays/hashmove
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhashchecker.py
executable file
·171 lines (144 loc) · 6.22 KB
/
hashchecker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/local/bin/python3
#hashchecker.py
#######################################################################################################################
###REQUIRED LIBRARIES####
###simple_salesforce
#######################################################################################################################
import hashlib
import os
import time
import re
import argparse
import config
from simple_salesforce import Salesforce
def makeFileList(sourceList,hashalg):
'''
returns a list of file(s) to hashchecker. list is a set of dictionaries
'''
flist = []
for s in sourceList:
if os.path.isdir(s) is False: #if first argument is a file it's p easy
sourceFile = os.path.join(dirs, x)
sourceBasename = os.path.basename(s)
if sourceBasename.endswith(hashalg):
fileDict = {"Filepath": sourceFile,"Filename" : sourceBasename,"Barcode" : "","SFHash" : "","SidecarHash" : "", "Result": ""}
flist.append(dict(fileDict))
else:
print("WARNING: Input file " + sourceBasename + " ignored because it's not a hash file")
#if the start object is a directory things get tricky
elif os.path.isdir(s) is True:
if not s.endswith("/"):
s = s + "/" #later, when we do some string subs, this keeps os.path.join() from breaking on a leading / I HATE HAVING TO DO THIS
for dirs, subdirs, files in os.walk(s): #walk recursively through the dirtree
for x in files: #ok, for all files rooted at start object
sourceFile = os.path.join(dirs, x) #grab the start file full path
sourceBasename = os.path.basename(sourceFile)
if sourceBasename.endswith(hashalg): #look only for sidecar checksum files
fileDict = {"Filepath": sourceFile,"Filename" : sourceBasename,"Barcode" : "","SFHash" : "","SidecarHash" : "","Result": ""}
flist.append(dict(fileDict))
else:
print("Critical Error. Could not determine if the input is a file or directory. Something is very wrong.")
sys.exit()
return flist
def processList(sf,dictList,hashlength):
for dict in dictList:
dict = getBarcode(dict)
dict["SFHash"] = getChecksumFromRecord(querySF(sf,dict["Barcode"]))
dict["SidecarHash"] = readHash(dict.get("Filepath"), hashlength) #get the checksum from the sidecar file
if dict["SFHash"] == dict["SidecarHash"]:
dict["Result"] = True
else:
dict["Result"] = False
return dictList
def getBarcode(dict):
barcode = dict.get("Filename")[4:11] #get the barcode from the filename
for b in barcode:
if not b.isdigit(): #this makes sure that the barcode is 7 numbers. if not it'll throw a failure
print("ERROR: File Barcode Not Found for " + sourceBasename)
else:
dict["Barcode"] = barcode
return dict
def readHash(hashFile, hashlength):
with open(hashFile,'r') as f: #open it
storedHash = re.search('\w{'+hashlength+'}',f.read()).group() #get the hash
return storedHash
def querySF(sf,barcode):
result = sf.query("SELECT messageDigest__c FROM Preservation_Object__c WHERE Name = '" + barcode + "'")
return result
def getChecksumFromRecord(sfData):
checksum = sfData["records"][0].get("messageDigest__c")
return checksum
def processResults(dictList):
count = 0
success = 0
fail = 0
failList = []
for dict in dictList:
if dict["Result"]:
success += 1
else:
failList.append(dict["Filename"])
fail += 1
count += 1
print("\n")
print("Number of Checksums Processed: " + str(count))
print("Number of Successes: " + str(success))
print("Number of Failures: " + str(fail))
if fail > 0:
print("\n")
print("List of Failed Files:")
for f in failList:
print(f)
print("\n")
def initLog(sourceList,destination,hashalg):
'''
initializes log file
'''
txtFile = open(destination + "/LoadingScript.log", "a+")
txtFile.write("Load and Verify Script Started at: " + time.strftime("%Y-%m-%d_%H:%M:%S") + "\n")
for f in sourceList:
txtFile.write("From: " + f + "\n")
txtFile.write("To: " + destination + "\n")
txtFile.write("Hash algorithm: " + hashalg + "\n")
txtFile.write("\n\n")
txtFile.close()
def logNewLine(text,destination):
txtFile = open(destination + "/LoadingScript.log", "a+")
txtFile.write("\n" + time.strftime("%Y-%m-%d_%H:%M:%S") + ": " + text)
def logSameLine(text,destination):
txtFile = open(destination + "/LoadingScript.log", "a+")
txtFile.write(text)
def make_args():
'''
initialize arguments from the cli
'''
parser = argparse.ArgumentParser()
parser.add_argument('-a','--algorithm',action='store',dest='a',default='md5',choices=['md5','sha1','sha256','sha512'],help="the hashing algorithm to use")
parser.add_argument('sourceObj',nargs='+',help="As many files are directories you would like processed. Only sidecar checksum files are processed.")
return parser.parse_args()
def main():
'''
do the thing
'''
#init args from cli
args = make_args()
#init salesforce login#
sf = Salesforce(username=config.username,password=config.password,security_token=config.security_token)
#Initialize log
#initLog(sourceList,destinationDir,args.a)
#init variables
dictList = []
hashAlgorithm = hashlib.new(args.a) #creates a hashlib object that is the algorithm we're using
hashlengths = {'md5':'32','sha1':'40','sha256':'64','sha512':'128'}
hashlength = hashlengths[args.a] #set value for comparison later
#Check that input conforms
if len(args.sourceObj) < 1: #if less than two input arguments we have to exit
print("CRITICAL ERROR: You must give this script at least one argument")
sys.exit()
#create list of dictionarie (which represent hash files) to be processed
dictList = makeFileList(args.sourceObj,args.a)
#process the list
dictList = processList(sf,dictList,hashlength)
#tally up the success and failures, print the failed files.
processResults(dictList)
main()