24
24
gdown , has_gdown = optional_import ("gdown" , "3.6" )
25
25
26
26
27
- def check_md5 (filepath : str , md5_value : Optional [str ] = None ) -> bool :
27
+ def check_hash (filepath : str , val : Optional [str ] = None , hash_type : str = "md5" ) -> bool :
28
28
"""
29
- check MD5 signature of specified file.
29
+ Verify hash signature of specified file.
30
30
31
31
Args:
32
- filepath: path of source file to verify MD5.
33
- md5_value: expected MD5 value of the file.
32
+ filepath: path of source file to verify hash value.
33
+ val: expected hash value of the file.
34
+ hash_type: 'md5' or 'sha1', defaults to 'md5'.
34
35
35
36
"""
36
- if md5_value is not None :
37
- md5 = hashlib .md5 ()
38
- try :
39
- with open (filepath , "rb" ) as f :
40
- for chunk in iter (lambda : f .read (1024 * 1024 ), b"" ):
41
- md5 .update (chunk )
42
- except Exception as e :
43
- print (f"Exception in check_md5: { e } " )
44
- return False
45
- if md5_value != md5 .hexdigest ():
46
- return False
37
+ if val is None :
38
+ print (f"Expected { hash_type } is None, skip { hash_type } check for file { filepath } ." )
39
+ return True
40
+ if hash_type .lower () == "md5" :
41
+ actual_hash = hashlib .md5 ()
42
+ elif hash_type .lower () == "sha1" :
43
+ actual_hash = hashlib .sha1 ()
47
44
else :
48
- print (f"expected MD5 is None, skip MD5 check for file { filepath } ." )
49
-
45
+ raise NotImplementedError (f"Unknown 'hash_type' { hash_type } ." )
46
+ try :
47
+ with open (filepath , "rb" ) as f :
48
+ for chunk in iter (lambda : f .read (1024 * 1024 ), b"" ):
49
+ actual_hash .update (chunk )
50
+ except Exception as e :
51
+ print (f"Exception in check_hash: { e } " )
52
+ return False
53
+ if val != actual_hash .hexdigest ():
54
+ print ("check_hash failed." )
55
+ return False
56
+
57
+ print (f"Verified '{ os .path .basename (filepath )} ', { hash_type } : { val } ." )
50
58
return True
51
59
52
60
53
- def download_url (url : str , filepath : str , md5_value : Optional [str ] = None ) -> None :
61
+ def download_url (url : str , filepath : str , hash_val : Optional [str ] = None , hash_type : str = "md5" ) -> None :
54
62
"""
55
- Download file from specified URL link, support process bar and MD5 check.
63
+ Download file from specified URL link, support process bar and hash check.
56
64
57
65
Args:
58
66
url: source URL link to download file.
59
67
filepath: target filepath to save the downloaded file.
60
- md5_value: expected MD5 value to validate the downloaded file.
61
- if None, skip MD5 validation.
68
+ hash_val: expected hash value to validate the downloaded file.
69
+ if None, skip hash validation.
70
+ hash_type: 'md5' or 'sha1', defaults to 'md5'.
62
71
63
72
Raises:
64
- RuntimeError: When the MD5 validation of the ``filepath`` existing file fails.
73
+ RuntimeError: When the hash validation of the ``filepath`` existing file fails.
65
74
RuntimeError: When a network issue or denied permission prevents the
66
75
file download from ``url`` to ``filepath``.
67
76
URLError: See urllib.request.urlretrieve.
68
77
HTTPError: See urllib.request.urlretrieve.
69
78
ContentTooShortError: See urllib.request.urlretrieve.
70
79
IOError: See urllib.request.urlretrieve.
71
- RuntimeError: When the MD5 validation of the ``url`` downloaded file fails.
80
+ RuntimeError: When the hash validation of the ``url`` downloaded file fails.
72
81
73
82
"""
74
83
if os .path .exists (filepath ):
75
- if not check_md5 (filepath , md5_value ):
76
- raise RuntimeError (f"MD5 check of existing file failed: filepath={ filepath } , expected MD5={ md5_value } ." )
84
+ if not check_hash (filepath , hash_val , hash_type ):
85
+ raise RuntimeError (
86
+ f"{ hash_type } check of existing file failed: filepath={ filepath } , expected { hash_type } ={ hash_val } ."
87
+ )
77
88
print (f"file { filepath } exists, skip downloading." )
78
89
return
79
90
@@ -110,8 +121,8 @@ def download_url(url: str, filepath: str, md5_value: Optional[str] = None) -> No
110
121
logging .debug ("IO Error - %s" % e )
111
122
finally :
112
123
if file_size == os .path .getsize (tmp_file_path ):
113
- if md5_value and not check_md5 (tmp_file_path , md5_value ):
114
- raise Exception ("Error validating the file against its MD5 hash" )
124
+ if hash_val and not check_hash (tmp_file_path , hash_val , hash_type ):
125
+ raise Exception (f "Error validating the file against its { hash_type } hash" )
115
126
shutil .move (tmp_file_path , filepath )
116
127
elif file_size == - 1 :
117
128
raise Exception ("Error getting Content-Length from server: %s" % url )
@@ -128,34 +139,38 @@ def _process_hook(blocknum: int, blocksize: int, totalsize: int):
128
139
print (f"download failed from { url } to { filepath } ." )
129
140
raise e
130
141
131
- if not check_md5 (filepath , md5_value ):
142
+ if not check_hash (filepath , hash_val , hash_type ):
132
143
raise RuntimeError (
133
- f"MD5 check of downloaded file failed: URL={ url } , filepath={ filepath } , expected MD5={ md5_value } ."
144
+ f"{ hash_type } check of downloaded file failed: URL={ url } , "
145
+ f"filepath={ filepath } , expected { hash_type } ={ hash_val } ."
134
146
)
135
147
136
148
137
- def extractall (filepath : str , output_dir : str , md5_value : Optional [str ] = None ) -> None :
149
+ def extractall (filepath : str , output_dir : str , hash_val : Optional [str ] = None , hash_type : str = "md5" ) -> None :
138
150
"""
139
151
Extract file to the output directory.
140
152
Expected file types are: `zip`, `tar.gz` and `tar`.
141
153
142
154
Args:
143
155
filepath: the file path of compressed file.
144
156
output_dir: target directory to save extracted files.
145
- md5_value: expected MD5 value to validate the compressed file.
146
- if None, skip MD5 validation.
157
+ hash_val: expected hash value to validate the compressed file.
158
+ if None, skip hash validation.
159
+ hash_type: 'md5' or 'sha1', defaults to 'md5'.
147
160
148
161
Raises:
149
- RuntimeError: When the MD5 validation of the ``filepath`` compressed file fails.
162
+ RuntimeError: When the hash validation of the ``filepath`` compressed file fails.
150
163
ValueError: When the ``filepath`` file extension is not one of [zip", "tar.gz", "tar"].
151
164
152
165
"""
153
166
target_file = os .path .join (output_dir , os .path .basename (filepath ).split ("." )[0 ])
154
167
if os .path .exists (target_file ):
155
168
print (f"extracted file { target_file } exists, skip extracting." )
156
169
return
157
- if not check_md5 (filepath , md5_value ):
158
- raise RuntimeError (f"MD5 check of compressed file failed: filepath={ filepath } , expected MD5={ md5_value } ." )
170
+ if not check_hash (filepath , hash_val , hash_type ):
171
+ raise RuntimeError (
172
+ f"{ hash_type } check of compressed file failed: " f"filepath={ filepath } , expected { hash_type } ={ hash_val } ."
173
+ )
159
174
160
175
if filepath .endswith ("zip" ):
161
176
zip_file = zipfile .ZipFile (filepath )
@@ -169,7 +184,9 @@ def extractall(filepath: str, output_dir: str, md5_value: Optional[str] = None)
169
184
raise ValueError ('Unsupported file extension, available options are: ["zip", "tar.gz", "tar"].' )
170
185
171
186
172
- def download_and_extract (url : str , filepath : str , output_dir : str , md5_value : Optional [str ] = None ) -> None :
187
+ def download_and_extract (
188
+ url : str , filepath : str , output_dir : str , hash_val : Optional [str ] = None , hash_type : str = "md5"
189
+ ) -> None :
173
190
"""
174
191
Download file from URL and extract it to the output directory.
175
192
@@ -178,9 +195,10 @@ def download_and_extract(url: str, filepath: str, output_dir: str, md5_value: Op
178
195
filepath: the file path of compressed file.
179
196
output_dir: target directory to save extracted files.
180
197
default is None to save in current directory.
181
- md5_value: expected MD5 value to validate the downloaded file.
182
- if None, skip MD5 validation.
198
+ hash_val: expected hash value to validate the downloaded file.
199
+ if None, skip hash validation.
200
+ hash_type: 'md5' or 'sha1', defaults to 'md5'.
183
201
184
202
"""
185
- download_url (url = url , filepath = filepath , md5_value = md5_value )
186
- extractall (filepath = filepath , output_dir = output_dir , md5_value = md5_value )
203
+ download_url (url = url , filepath = filepath , hash_val = hash_val , hash_type = hash_type )
204
+ extractall (filepath = filepath , output_dir = output_dir , hash_val = hash_val , hash_type = hash_type )
0 commit comments