Skip to content

Commit 33114f0

Browse files
tianyizheng02github-actions
and
github-actions
authoredApr 1, 2023
Revamp md5.py (#8065)
* Add type hints to md5.py * Rename some vars to snake case * Specify functions imported from math * Rename vars and functions to be more descriptive * Make tests from test function into doctests * Clarify more var names * Refactor some MD5 code into preprocess function * Simplify loop indices in get_block_words * Add more detailed comments, docs, and doctests * updating DIRECTORY.md * updating DIRECTORY.md * updating DIRECTORY.md * updating DIRECTORY.md * updating DIRECTORY.md * Add type hints to md5.py * Rename some vars to snake case * Specify functions imported from math * Rename vars and functions to be more descriptive * Make tests from test function into doctests * Clarify more var names * Refactor some MD5 code into preprocess function * Simplify loop indices in get_block_words * Add more detailed comments, docs, and doctests * updating DIRECTORY.md * updating DIRECTORY.md * updating DIRECTORY.md * updating DIRECTORY.md * Convert str types to bytes * Add tests comparing md5_me to hashlib's md5 * Replace line-break backslashes with parentheses --------- Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
1 parent 56a40eb commit 33114f0

File tree

2 files changed

+290
-83
lines changed

2 files changed

+290
-83
lines changed
 

‎DIRECTORY.md

+1
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,7 @@
717717
* [Archimedes Principle](physics/archimedes_principle.py)
718718
* [Casimir Effect](physics/casimir_effect.py)
719719
* [Centripetal Force](physics/centripetal_force.py)
720+
* [Grahams Law](physics/grahams_law.py)
720721
* [Horizontal Projectile Motion](physics/horizontal_projectile_motion.py)
721722
* [Hubble Parameter](physics/hubble_parameter.py)
722723
* [Ideal Gas Law](physics/ideal_gas_law.py)

‎hashes/md5.py

+289-83
Original file line numberDiff line numberDiff line change
@@ -1,127 +1,338 @@
1-
import math
1+
"""
2+
The MD5 algorithm is a hash function that's commonly used as a checksum to
3+
detect data corruption. The algorithm works by processing a given message in
4+
blocks of 512 bits, padding the message as needed. It uses the blocks to operate
5+
a 128-bit state and performs a total of 64 such operations. Note that all values
6+
are little-endian, so inputs are converted as needed.
27
8+
Although MD5 was used as a cryptographic hash function in the past, it's since
9+
been cracked, so it shouldn't be used for security purposes.
310
4-
def rearrange(bit_string_32):
5-
"""[summary]
6-
Regroups the given binary string.
11+
For more info, see https://en.wikipedia.org/wiki/MD5
12+
"""
13+
14+
from collections.abc import Generator
15+
from math import sin
16+
17+
18+
def to_little_endian(string_32: bytes) -> bytes:
19+
"""
20+
Converts the given string to little-endian in groups of 8 chars.
721
822
Arguments:
9-
bitString32 {[string]} -- [32 bit binary]
23+
string_32 {[string]} -- [32-char string]
1024
1125
Raises:
12-
ValueError -- [if the given string not are 32 bit binary string]
26+
ValueError -- [input is not 32 char]
1327
1428
Returns:
15-
[string] -- [32 bit binary string]
16-
>>> rearrange('1234567890abcdfghijklmnopqrstuvw')
17-
'pqrstuvwhijklmno90abcdfg12345678'
29+
32-char little-endian string
30+
>>> to_little_endian(b'1234567890abcdfghijklmnopqrstuvw')
31+
b'pqrstuvwhijklmno90abcdfg12345678'
32+
>>> to_little_endian(b'1234567890')
33+
Traceback (most recent call last):
34+
...
35+
ValueError: Input must be of length 32
1836
"""
37+
if len(string_32) != 32:
38+
raise ValueError("Input must be of length 32")
1939

20-
if len(bit_string_32) != 32:
21-
raise ValueError("Need length 32")
22-
new_string = ""
40+
little_endian = b""
2341
for i in [3, 2, 1, 0]:
24-
new_string += bit_string_32[8 * i : 8 * i + 8]
25-
return new_string
42+
little_endian += string_32[8 * i : 8 * i + 8]
43+
return little_endian
44+
45+
46+
def reformat_hex(i: int) -> bytes:
47+
"""
48+
Converts the given non-negative integer to hex string.
2649
50+
Example: Suppose the input is the following:
51+
i = 1234
2752
28-
def reformat_hex(i):
29-
"""[summary]
30-
Converts the given integer into 8-digit hex number.
53+
The input is 0x000004d2 in hex, so the little-endian hex string is
54+
"d2040000".
3155
3256
Arguments:
33-
i {[int]} -- [integer]
57+
i {[int]} -- [integer]
58+
59+
Raises:
60+
ValueError -- [input is negative]
61+
62+
Returns:
63+
8-char little-endian hex string
64+
65+
>>> reformat_hex(1234)
66+
b'd2040000'
3467
>>> reformat_hex(666)
35-
'9a020000'
68+
b'9a020000'
69+
>>> reformat_hex(0)
70+
b'00000000'
71+
>>> reformat_hex(1234567890)
72+
b'd2029649'
73+
>>> reformat_hex(1234567890987654321)
74+
b'b11c6cb1'
75+
>>> reformat_hex(-1)
76+
Traceback (most recent call last):
77+
...
78+
ValueError: Input must be non-negative
3679
"""
80+
if i < 0:
81+
raise ValueError("Input must be non-negative")
3782

38-
hexrep = format(i, "08x")
39-
thing = ""
83+
hex_rep = format(i, "08x")[-8:]
84+
little_endian_hex = b""
4085
for i in [3, 2, 1, 0]:
41-
thing += hexrep[2 * i : 2 * i + 2]
42-
return thing
86+
little_endian_hex += hex_rep[2 * i : 2 * i + 2].encode("utf-8")
87+
return little_endian_hex
4388

4489

45-
def pad(bit_string):
46-
"""[summary]
47-
Fills up the binary string to a 512 bit binary string
90+
def preprocess(message: bytes) -> bytes:
91+
"""
92+
Preprocesses the message string:
93+
- Convert message to bit string
94+
- Pad bit string to a multiple of 512 chars:
95+
- Append a 1
96+
- Append 0's until length = 448 (mod 512)
97+
- Append length of original message (64 chars)
98+
99+
Example: Suppose the input is the following:
100+
message = "a"
101+
102+
The message bit string is "01100001", which is 8 bits long. Thus, the
103+
bit string needs 439 bits of padding so that
104+
(bit_string + "1" + padding) = 448 (mod 512).
105+
The message length is "000010000...0" in 64-bit little-endian binary.
106+
The combined bit string is then 512 bits long.
48107
49108
Arguments:
50-
bitString {[string]} -- [binary string]
109+
message {[string]} -- [message string]
51110
52111
Returns:
53-
[string] -- [binary string]
112+
processed bit string padded to a multiple of 512 chars
113+
114+
>>> preprocess(b"a") == (b"01100001" + b"1" +
115+
... (b"0" * 439) + b"00001000" + (b"0" * 56))
116+
True
117+
>>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64)
118+
True
54119
"""
55-
start_length = len(bit_string)
56-
bit_string += "1"
120+
bit_string = b""
121+
for char in message:
122+
bit_string += format(char, "08b").encode("utf-8")
123+
start_len = format(len(bit_string), "064b").encode("utf-8")
124+
125+
# Pad bit_string to a multiple of 512 chars
126+
bit_string += b"1"
57127
while len(bit_string) % 512 != 448:
58-
bit_string += "0"
59-
last_part = format(start_length, "064b")
60-
bit_string += rearrange(last_part[32:]) + rearrange(last_part[:32])
128+
bit_string += b"0"
129+
bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32])
130+
61131
return bit_string
62132

63133

64-
def get_block(bit_string):
65-
"""[summary]
66-
Iterator:
67-
Returns by each call a list of length 16 with the 32 bit
68-
integer blocks.
134+
def get_block_words(bit_string: bytes) -> Generator[list[int], None, None]:
135+
"""
136+
Splits bit string into blocks of 512 chars and yields each block as a list
137+
of 32-bit words
138+
139+
Example: Suppose the input is the following:
140+
bit_string =
141+
"000000000...0" + # 0x00 (32 bits, padded to the right)
142+
"000000010...0" + # 0x01 (32 bits, padded to the right)
143+
"000000100...0" + # 0x02 (32 bits, padded to the right)
144+
"000000110...0" + # 0x03 (32 bits, padded to the right)
145+
...
146+
"000011110...0" # 0x0a (32 bits, padded to the right)
147+
148+
Then len(bit_string) == 512, so there'll be 1 block. The block is split
149+
into 32-bit words, and each word is converted to little endian. The
150+
first word is interpreted as 0 in decimal, the second word is
151+
interpreted as 1 in decimal, etc.
152+
153+
Thus, block_words == [[0, 1, 2, 3, ..., 15]].
69154
70155
Arguments:
71-
bit_string {[string]} -- [binary string >= 512]
156+
bit_string {[string]} -- [bit string with multiple of 512 as length]
157+
158+
Raises:
159+
ValueError -- [length of bit string isn't multiple of 512]
160+
161+
Yields:
162+
a list of 16 32-bit words
163+
164+
>>> test_string = ("".join(format(n << 24, "032b") for n in range(16))
165+
... .encode("utf-8"))
166+
>>> list(get_block_words(test_string))
167+
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
168+
>>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4
169+
True
170+
>>> list(get_block_words(b"1" * 512)) == [[4294967295] * 16]
171+
True
172+
>>> list(get_block_words(b""))
173+
[]
174+
>>> list(get_block_words(b"1111"))
175+
Traceback (most recent call last):
176+
...
177+
ValueError: Input must have length that's a multiple of 512
72178
"""
179+
if len(bit_string) % 512 != 0:
180+
raise ValueError("Input must have length that's a multiple of 512")
73181

74-
curr_pos = 0
75-
while curr_pos < len(bit_string):
76-
curr_part = bit_string[curr_pos : curr_pos + 512]
77-
my_splits = []
78-
for i in range(16):
79-
my_splits.append(int(rearrange(curr_part[32 * i : 32 * i + 32]), 2))
80-
yield my_splits
81-
curr_pos += 512
182+
for pos in range(0, len(bit_string), 512):
183+
block = bit_string[pos : pos + 512]
184+
block_words = []
185+
for i in range(0, 512, 32):
186+
block_words.append(int(to_little_endian(block[i : i + 32]), 2))
187+
yield block_words
82188

83189

84-
def not32(i):
190+
def not_32(i: int) -> int:
85191
"""
86-
>>> not32(34)
192+
Perform bitwise NOT on given int.
193+
194+
Arguments:
195+
i {[int]} -- [given int]
196+
197+
Raises:
198+
ValueError -- [input is negative]
199+
200+
Returns:
201+
Result of bitwise NOT on i
202+
203+
>>> not_32(34)
87204
4294967261
205+
>>> not_32(1234)
206+
4294966061
207+
>>> not_32(4294966061)
208+
1234
209+
>>> not_32(0)
210+
4294967295
211+
>>> not_32(1)
212+
4294967294
213+
>>> not_32(-1)
214+
Traceback (most recent call last):
215+
...
216+
ValueError: Input must be non-negative
88217
"""
218+
if i < 0:
219+
raise ValueError("Input must be non-negative")
220+
89221
i_str = format(i, "032b")
90222
new_str = ""
91223
for c in i_str:
92224
new_str += "1" if c == "0" else "0"
93225
return int(new_str, 2)
94226

95227

96-
def sum32(a, b):
228+
def sum_32(a: int, b: int) -> int:
229+
"""
230+
Add two numbers as 32-bit ints.
231+
232+
Arguments:
233+
a {[int]} -- [first given int]
234+
b {[int]} -- [second given int]
235+
236+
Returns:
237+
(a + b) as an unsigned 32-bit int
238+
239+
>>> sum_32(1, 1)
240+
2
241+
>>> sum_32(2, 3)
242+
5
243+
>>> sum_32(0, 0)
244+
0
245+
>>> sum_32(-1, -1)
246+
4294967294
247+
>>> sum_32(4294967295, 1)
248+
0
249+
"""
97250
return (a + b) % 2**32
98251

99252

100-
def leftrot32(i, s):
101-
return (i << s) ^ (i >> (32 - s))
253+
def left_rotate_32(i: int, shift: int) -> int:
254+
"""
255+
Rotate the bits of a given int left by a given amount.
256+
257+
Arguments:
258+
i {[int]} -- [given int]
259+
shift {[int]} -- [shift amount]
260+
261+
Raises:
262+
ValueError -- [either given int or shift is negative]
102263
264+
Returns:
265+
`i` rotated to the left by `shift` bits
266+
267+
>>> left_rotate_32(1234, 1)
268+
2468
269+
>>> left_rotate_32(1111, 4)
270+
17776
271+
>>> left_rotate_32(2147483648, 1)
272+
1
273+
>>> left_rotate_32(2147483648, 3)
274+
4
275+
>>> left_rotate_32(4294967295, 4)
276+
4294967295
277+
>>> left_rotate_32(1234, 0)
278+
1234
279+
>>> left_rotate_32(0, 0)
280+
0
281+
>>> left_rotate_32(-1, 0)
282+
Traceback (most recent call last):
283+
...
284+
ValueError: Input must be non-negative
285+
>>> left_rotate_32(0, -1)
286+
Traceback (most recent call last):
287+
...
288+
ValueError: Shift must be non-negative
289+
"""
290+
if i < 0:
291+
raise ValueError("Input must be non-negative")
292+
if shift < 0:
293+
raise ValueError("Shift must be non-negative")
294+
return ((i << shift) ^ (i >> (32 - shift))) % 2**32
295+
296+
297+
def md5_me(message: bytes) -> bytes:
298+
"""
299+
Returns the 32-char MD5 hash of a given message.
103300
104-
def md5me(test_string):
105-
"""[summary]
106-
Returns a 32-bit hash code of the string 'testString'
301+
Reference: https://en.wikipedia.org/wiki/MD5#Algorithm
107302
108303
Arguments:
109-
testString {[string]} -- [message]
304+
message {[string]} -- [message]
305+
306+
Returns:
307+
32-char MD5 hash string
308+
309+
>>> md5_me(b"")
310+
b'd41d8cd98f00b204e9800998ecf8427e'
311+
>>> md5_me(b"The quick brown fox jumps over the lazy dog")
312+
b'9e107d9d372bb6826bd81d3542a419d6'
313+
>>> md5_me(b"The quick brown fox jumps over the lazy dog.")
314+
b'e4d909c290d0fb1ca068ffaddf22cbd0'
315+
316+
>>> import hashlib
317+
>>> from string import ascii_letters
318+
>>> msgs = [b"", ascii_letters.encode("utf-8"), "Üñîçø∂é".encode("utf-8"),
319+
... b"The quick brown fox jumps over the lazy dog."]
320+
>>> all(md5_me(msg) == hashlib.md5(msg).hexdigest().encode("utf-8") for msg in msgs)
321+
True
110322
"""
111323

112-
bs = ""
113-
for i in test_string:
114-
bs += format(ord(i), "08b")
115-
bs = pad(bs)
324+
# Convert to bit string, add padding and append message length
325+
bit_string = preprocess(message)
116326

117-
tvals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)]
327+
added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)]
118328

329+
# Starting states
119330
a0 = 0x67452301
120331
b0 = 0xEFCDAB89
121332
c0 = 0x98BADCFE
122333
d0 = 0x10325476
123334

124-
s = [
335+
shift_amounts = [
125336
7,
126337
12,
127338
17,
@@ -188,51 +399,46 @@ def md5me(test_string):
188399
21,
189400
]
190401

191-
for m in get_block(bs):
402+
# Process bit string in chunks, each with 16 32-char words
403+
for block_words in get_block_words(bit_string):
192404
a = a0
193405
b = b0
194406
c = c0
195407
d = d0
408+
409+
# Hash current chunk
196410
for i in range(64):
197411
if i <= 15:
198-
# f = (B & C) | (not32(B) & D)
412+
# f = (b & c) | (not_32(b) & d) # Alternate definition for f
199413
f = d ^ (b & (c ^ d))
200414
g = i
201415
elif i <= 31:
202-
# f = (D & B) | (not32(D) & C)
416+
# f = (d & b) | (not_32(d) & c) # Alternate definition for f
203417
f = c ^ (d & (b ^ c))
204418
g = (5 * i + 1) % 16
205419
elif i <= 47:
206420
f = b ^ c ^ d
207421
g = (3 * i + 5) % 16
208422
else:
209-
f = c ^ (b | not32(d))
423+
f = c ^ (b | not_32(d))
210424
g = (7 * i) % 16
211-
dtemp = d
425+
f = (f + a + added_consts[i] + block_words[g]) % 2**32
426+
a = d
212427
d = c
213428
c = b
214-
b = sum32(b, leftrot32((a + f + tvals[i] + m[g]) % 2**32, s[i]))
215-
a = dtemp
216-
a0 = sum32(a0, a)
217-
b0 = sum32(b0, b)
218-
c0 = sum32(c0, c)
219-
d0 = sum32(d0, d)
429+
b = sum_32(b, left_rotate_32(f, shift_amounts[i]))
430+
431+
# Add hashed chunk to running total
432+
a0 = sum_32(a0, a)
433+
b0 = sum_32(b0, b)
434+
c0 = sum_32(c0, c)
435+
d0 = sum_32(d0, d)
220436

221437
digest = reformat_hex(a0) + reformat_hex(b0) + reformat_hex(c0) + reformat_hex(d0)
222438
return digest
223439

224440

225-
def test():
226-
assert md5me("") == "d41d8cd98f00b204e9800998ecf8427e"
227-
assert (
228-
md5me("The quick brown fox jumps over the lazy dog")
229-
== "9e107d9d372bb6826bd81d3542a419d6"
230-
)
231-
print("Success.")
232-
233-
234441
if __name__ == "__main__":
235-
test()
236442
import doctest
237443

238444
doctest.testmod()

0 commit comments

Comments
 (0)
Please sign in to comment.