From 7fcce6e3b1eceea366f12f0b4760ea256beed61c Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 13:33:17 -0800 Subject: [PATCH 01/30] Add type hints to md5.py --- hashes/md5.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 2020bf2e53bf..e36aff96129d 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,7 +1,8 @@ import math +from collections.abc import Generator -def rearrange(bit_string_32): +def rearrange(bit_string_32: str) -> str: """[summary] Regroups the given binary string. @@ -25,7 +26,7 @@ def rearrange(bit_string_32): return new_string -def reformat_hex(i): +def reformat_hex(i: int) -> str: """[summary] Converts the given integer into 8-digit hex number. @@ -42,12 +43,12 @@ def reformat_hex(i): return thing -def pad(bit_string): +def pad(bit_string: str) -> str: """[summary] Fills up the binary string to a 512 bit binary string Arguments: - bitString {[string]} -- [binary string] + bit_string {[string]} -- [binary string] Returns: [string] -- [binary string] @@ -61,10 +62,10 @@ def pad(bit_string): return bit_string -def get_block(bit_string): +def get_block(bit_string: str) -> Generator[list[int], None, None]: """[summary] Iterator: - Returns by each call a list of length 16 with the 32 bit + Returns by each call a list of length 16 with the 32-bit integer blocks. Arguments: @@ -81,7 +82,7 @@ def get_block(bit_string): curr_pos += 512 -def not32(i): +def not32(i: int) -> int: """ >>> not32(34) 4294967261 @@ -93,25 +94,25 @@ def not32(i): return int(new_str, 2) -def sum32(a, b): +def sum32(a: int, b: int) -> int: return (a + b) % 2**32 -def leftrot32(i, s): +def leftrot32(i: int, s: int) -> int: return (i << s) ^ (i >> (32 - s)) -def md5me(test_string): +def md5me(test_string: str) -> str: """[summary] - Returns a 32-bit hash code of the string 'testString' + Returns a 32-bit hash code of the string 'test_string' Arguments: - testString {[string]} -- [message] + test_string {[string]} -- [message] """ bs = "" - for i in test_string: - bs += format(ord(i), "08b") + for char in test_string: + bs += format(ord(char), "08b") bs = pad(bs) tvals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] @@ -222,7 +223,7 @@ def md5me(test_string): return digest -def test(): +def test() -> None: assert md5me("") == "d41d8cd98f00b204e9800998ecf8427e" assert ( md5me("The quick brown fox jumps over the lazy dog") From a20ccf88cfc9665c320e131ad4eb5f506da68a6e Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 13:50:44 -0800 Subject: [PATCH 02/30] Rename some vars to snake case --- hashes/md5.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index e36aff96129d..f0a48f43ec11 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -36,10 +36,10 @@ def reformat_hex(i: int) -> str: '9a020000' """ - hexrep = format(i, "08x") + hex_rep = format(i, "08x") thing = "" for i in [3, 2, 1, 0]: - thing += hexrep[2 * i : 2 * i + 2] + thing += hex_rep[2 * i : 2 * i + 2] return thing @@ -82,9 +82,9 @@ def get_block(bit_string: str) -> Generator[list[int], None, None]: curr_pos += 512 -def not32(i: int) -> int: +def not_32(i: int) -> int: """ - >>> not32(34) + >>> not_32(34) 4294967261 """ i_str = format(i, "032b") @@ -94,15 +94,15 @@ def not32(i: int) -> int: return int(new_str, 2) -def sum32(a: int, b: int) -> int: +def sum_32(a: int, b: int) -> int: return (a + b) % 2**32 -def leftrot32(i: int, s: int) -> int: +def left_rotate_32(i: int, s: int) -> int: return (i << s) ^ (i >> (32 - s)) -def md5me(test_string: str) -> str: +def md5_me(test_string: str) -> str: """[summary] Returns a 32-bit hash code of the string 'test_string' @@ -115,7 +115,7 @@ def md5me(test_string: str) -> str: bs += format(ord(char), "08b") bs = pad(bs) - tvals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] + t_vals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] a0 = 0x67452301 b0 = 0xEFCDAB89 @@ -196,37 +196,37 @@ def md5me(test_string: str) -> str: d = d0 for i in range(64): if i <= 15: - # f = (B & C) | (not32(B) & D) + # f = (B & C) | (not_32(B) & D) f = d ^ (b & (c ^ d)) g = i elif i <= 31: - # f = (D & B) | (not32(D) & C) + # f = (D & B) | (not_32(D) & C) f = c ^ (d & (b ^ c)) g = (5 * i + 1) % 16 elif i <= 47: f = b ^ c ^ d g = (3 * i + 5) % 16 else: - f = c ^ (b | not32(d)) + f = c ^ (b | not_32(d)) g = (7 * i) % 16 - dtemp = d + d_temp = d d = c c = b - b = sum32(b, leftrot32((a + f + tvals[i] + m[g]) % 2**32, s[i])) - a = dtemp - a0 = sum32(a0, a) - b0 = sum32(b0, b) - c0 = sum32(c0, c) - d0 = sum32(d0, d) + b = sum_32(b, left_rotate_32((a + f + t_vals[i] + m[g]) % 2**32, s[i])) + a = d_temp + a0 = sum_32(a0, a) + b0 = sum_32(b0, b) + c0 = sum_32(c0, c) + d0 = sum_32(d0, d) digest = reformat_hex(a0) + reformat_hex(b0) + reformat_hex(c0) + reformat_hex(d0) return digest def test() -> None: - assert md5me("") == "d41d8cd98f00b204e9800998ecf8427e" + assert md5_me("") == "d41d8cd98f00b204e9800998ecf8427e" assert ( - md5me("The quick brown fox jumps over the lazy dog") + md5_me("The quick brown fox jumps over the lazy dog") == "9e107d9d372bb6826bd81d3542a419d6" ) print("Success.") From ce2d51faf2aa35ae916fe7b556f5975573b6342c Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 13:52:07 -0800 Subject: [PATCH 03/30] Specify functions imported from math --- hashes/md5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index f0a48f43ec11..781d9e6b6f5b 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,5 +1,5 @@ -import math from collections.abc import Generator +from math import sin def rearrange(bit_string_32: str) -> str: @@ -115,7 +115,7 @@ def md5_me(test_string: str) -> str: bs += format(ord(char), "08b") bs = pad(bs) - t_vals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] + t_vals = [int(2**32 * abs(sin(i + 1))) for i in range(64)] a0 = 0x67452301 b0 = 0xEFCDAB89 From d243bf3375718731048fafcbe71b36030be087e3 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:11:19 -0800 Subject: [PATCH 04/30] Rename vars and functions to be more descriptive --- hashes/md5.py | 66 +++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 781d9e6b6f5b..36b069b33be4 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -2,19 +2,19 @@ from math import sin -def rearrange(bit_string_32: str) -> str: +def to_little_endian(bit_string_32: str) -> str: """[summary] - Regroups the given binary string. + Converts the given binary string to little-endian in groups of 8 bits. Arguments: - bitString32 {[string]} -- [32 bit binary] + bit_string_32 {[string]} -- [32 bit binary] Raises: - ValueError -- [if the given string not are 32 bit binary string] + ValueError -- [if the given string not are 32 bit binary string] Returns: [string] -- [32 bit binary string] - >>> rearrange('1234567890abcdfghijklmnopqrstuvw') + >>> to_little_endian('1234567890abcdfghijklmnopqrstuvw') 'pqrstuvwhijklmno90abcdfg12345678' """ @@ -28,7 +28,7 @@ def rearrange(bit_string_32: str) -> str: def reformat_hex(i: int) -> str: """[summary] - Converts the given integer into 8-digit hex number. + Converts the given integer into 8-char hex number. Arguments: i {[int]} -- [integer] @@ -43,7 +43,7 @@ def reformat_hex(i: int) -> str: return thing -def pad(bit_string: str) -> str: +def preprocess(bit_string: str) -> str: """[summary] Fills up the binary string to a 512 bit binary string @@ -58,11 +58,11 @@ def pad(bit_string: str) -> str: while len(bit_string) % 512 != 448: bit_string += "0" last_part = format(start_length, "064b") - bit_string += rearrange(last_part[32:]) + rearrange(last_part[:32]) + bit_string += to_little_endian(last_part[32:]) + to_little_endian(last_part[:32]) return bit_string -def get_block(bit_string: str) -> Generator[list[int], None, None]: +def get_block_words(bit_string: str) -> Generator[list[int], None, None]: """[summary] Iterator: Returns by each call a list of length 16 with the 32-bit @@ -72,14 +72,14 @@ def get_block(bit_string: str) -> Generator[list[int], None, None]: bit_string {[string]} -- [binary string >= 512] """ - curr_pos = 0 - while curr_pos < len(bit_string): - curr_part = bit_string[curr_pos : curr_pos + 512] - my_splits = [] + pos = 0 + while pos < len(bit_string): + block = bit_string[pos : pos + 512] + block_words = [] for i in range(16): - my_splits.append(int(rearrange(curr_part[32 * i : 32 * i + 32]), 2)) - yield my_splits - curr_pos += 512 + block_words.append(int(to_little_endian(block[32 * i : 32 * i + 32]), 2)) + yield block_words + pos += 512 def not_32(i: int) -> int: @@ -98,31 +98,31 @@ def sum_32(a: int, b: int) -> int: return (a + b) % 2**32 -def left_rotate_32(i: int, s: int) -> int: - return (i << s) ^ (i >> (32 - s)) +def left_rotate_32(i: int, shift: int) -> int: + return (i << shift) ^ (i >> (32 - shift)) -def md5_me(test_string: str) -> str: +def md5_me(message: str) -> str: """[summary] - Returns a 32-bit hash code of the string 'test_string' + Returns a 32-bit hash of the string 'message' Arguments: - test_string {[string]} -- [message] + message {[string]} -- [message] """ - bs = "" - for char in test_string: - bs += format(ord(char), "08b") - bs = pad(bs) + bit_string = "" + for char in message: + bit_string += format(ord(char), "08b") + bit_string = preprocess(bit_string) - t_vals = [int(2**32 * abs(sin(i + 1))) for i in range(64)] + added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)] a0 = 0x67452301 b0 = 0xEFCDAB89 c0 = 0x98BADCFE d0 = 0x10325476 - s = [ + shift_amounts = [ 7, 12, 17, @@ -189,18 +189,18 @@ def md5_me(test_string: str) -> str: 21, ] - for m in get_block(bs): + for block_words in get_block_words(bit_string): a = a0 b = b0 c = c0 d = d0 for i in range(64): if i <= 15: - # f = (B & C) | (not_32(B) & D) + # f = (b & c) | (not_32(b) & d) f = d ^ (b & (c ^ d)) g = i elif i <= 31: - # f = (D & B) | (not_32(D) & C) + # f = (d & b) | (not_32(d) & c) f = c ^ (d & (b ^ c)) g = (5 * i + 1) % 16 elif i <= 47: @@ -209,11 +209,11 @@ def md5_me(test_string: str) -> str: else: f = c ^ (b | not_32(d)) g = (7 * i) % 16 - d_temp = d + f = (f + a + added_consts[i] + block_words[g]) % 2**32 + a = d d = c c = b - b = sum_32(b, left_rotate_32((a + f + t_vals[i] + m[g]) % 2**32, s[i])) - a = d_temp + b = sum_32(b, left_rotate_32(f, shift_amounts[i])) a0 = sum_32(a0, a) b0 = sum_32(b0, b) c0 = sum_32(c0, c) From b29ab6644fff660f3d5bc11892d1581ebeac0f4a Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:16:38 -0800 Subject: [PATCH 05/30] Make tests from test function into doctests --- hashes/md5.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 36b069b33be4..f5018cad8492 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -108,6 +108,13 @@ def md5_me(message: str) -> str: Arguments: message {[string]} -- [message] + + >>> md5_me("") + 'd41d8cd98f00b204e9800998ecf8427e' + >>> md5_me("The quick brown fox jumps over the lazy dog") + '9e107d9d372bb6826bd81d3542a419d6' + >>> md5_me("The quick brown fox jumps over the lazy dog.") + 'e4d909c290d0fb1ca068ffaddf22cbd0' """ bit_string = "" @@ -223,17 +230,7 @@ def md5_me(message: str) -> str: return digest -def test() -> None: - assert md5_me("") == "d41d8cd98f00b204e9800998ecf8427e" - assert ( - md5_me("The quick brown fox jumps over the lazy dog") - == "9e107d9d372bb6826bd81d3542a419d6" - ) - print("Success.") - - if __name__ == "__main__": - test() import doctest doctest.testmod() From 0b5e6a4fc5ced4297b33fa0f59e35980f7590fe8 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:26:47 -0800 Subject: [PATCH 06/30] Clarify more var names --- hashes/md5.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index f5018cad8492..4b20bfadf367 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -4,7 +4,7 @@ def to_little_endian(bit_string_32: str) -> str: """[summary] - Converts the given binary string to little-endian in groups of 8 bits. + Converts the given binary string to little-endian in groups of 8 chars. Arguments: bit_string_32 {[string]} -- [32 bit binary] @@ -20,27 +20,27 @@ def to_little_endian(bit_string_32: str) -> str: if len(bit_string_32) != 32: raise ValueError("Need length 32") - new_string = "" + little_endian = "" for i in [3, 2, 1, 0]: - new_string += bit_string_32[8 * i : 8 * i + 8] - return new_string + little_endian += bit_string_32[8 * i : 8 * i + 8] + return little_endian def reformat_hex(i: int) -> str: """[summary] - Converts the given integer into 8-char hex number. + Converts the given integer into 8-char little-endian hex number. Arguments: - i {[int]} -- [integer] + i {[int]} -- [integer] >>> reformat_hex(666) '9a020000' """ hex_rep = format(i, "08x") - thing = "" + little_endian_hex = "" for i in [3, 2, 1, 0]: - thing += hex_rep[2 * i : 2 * i + 2] - return thing + little_endian_hex += hex_rep[2 * i : 2 * i + 2] + return little_endian_hex def preprocess(bit_string: str) -> str: @@ -65,11 +65,11 @@ def preprocess(bit_string: str) -> str: def get_block_words(bit_string: str) -> Generator[list[int], None, None]: """[summary] Iterator: - Returns by each call a list of length 16 with the 32-bit - integer blocks. + Returns by each call a list of length 16 with the 32-bit + integer blocks. Arguments: - bit_string {[string]} -- [binary string >= 512] + bit_string {[string]} -- [binary string >= 512] """ pos = 0 @@ -107,7 +107,7 @@ def md5_me(message: str) -> str: Returns a 32-bit hash of the string 'message' Arguments: - message {[string]} -- [message] + message {[string]} -- [message] >>> md5_me("") 'd41d8cd98f00b204e9800998ecf8427e' From 18d891caa7d95f18199e1dcdc689e6de07774ef5 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:36:52 -0800 Subject: [PATCH 07/30] Refactor some MD5 code into preprocess function --- hashes/md5.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 4b20bfadf367..2bf98772a7b9 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -43,22 +43,29 @@ def reformat_hex(i: int) -> str: return little_endian_hex -def preprocess(bit_string: str) -> str: +def preprocess(message: str) -> str: """[summary] - Fills up the binary string to a 512 bit binary string + Preprocesses the message string: + - Convert message to bit string + - Pad bit string to a multiple of 512 bits: + - Append a 1 + - Append 0's until length = 448 (mod 512), 64 bits short of a multiple of 512 + - Append length of original message (64 bits) Arguments: - bit_string {[string]} -- [binary string] + message {[string]} -- [message string] Returns: - [string] -- [binary string] + [string] -- [padded bit string] """ - start_length = len(bit_string) + bit_string = "" + for char in message: + bit_string += format(ord(char), "08b") + start_len = format(len(bit_string), "064b") bit_string += "1" while len(bit_string) % 512 != 448: bit_string += "0" - last_part = format(start_length, "064b") - bit_string += to_little_endian(last_part[32:]) + to_little_endian(last_part[:32]) + bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32]) return bit_string @@ -117,10 +124,7 @@ def md5_me(message: str) -> str: 'e4d909c290d0fb1ca068ffaddf22cbd0' """ - bit_string = "" - for char in message: - bit_string += format(ord(char), "08b") - bit_string = preprocess(bit_string) + bit_string = preprocess(message) added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)] From 27a8b29a0d0f07f40bc6494bb10c9cde99fa99bf Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:48:50 -0800 Subject: [PATCH 08/30] Simplify loop indices in get_block_words --- hashes/md5.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 2bf98772a7b9..0aca6a5e34fc 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -79,14 +79,12 @@ def get_block_words(bit_string: str) -> Generator[list[int], None, None]: bit_string {[string]} -- [binary string >= 512] """ - pos = 0 - while pos < len(bit_string): + for pos in range(0, len(bit_string), 512): block = bit_string[pos : pos + 512] block_words = [] - for i in range(16): - block_words.append(int(to_little_endian(block[32 * i : 32 * i + 32]), 2)) + for i in range(0, 512, 32): + block_words.append(int(to_little_endian(block[i : i + 32]), 2)) yield block_words - pos += 512 def not_32(i: int) -> int: From 885f11626b8044233ce512f63f720b06a6fc2b59 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sun, 1 Jan 2023 00:25:40 -0800 Subject: [PATCH 09/30] Add more detailed comments, docs, and doctests --- hashes/md5.py | 253 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 225 insertions(+), 28 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 0aca6a5e34fc..cc83c5758c30 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,42 +1,86 @@ +""" +The MD5 algorithm is a hash function that's commonly used as a checksum to +detect data corruption. The algorithm works by processing a given message in +blocks of 512 bits, padding the message as needed. It uses the blocks to operate +a 128-bit state and performs a total of 64 such operations. Note that all values +are little-endian, so inputs are converted as needed. + +Although MD5 was used as a cryptographic hash function in the past, it's since +been cracked, so it shouldn't be used for security purposes. + +For more info, see https://en.wikipedia.org/wiki/MD5 +""" + from collections.abc import Generator from math import sin -def to_little_endian(bit_string_32: str) -> str: - """[summary] - Converts the given binary string to little-endian in groups of 8 chars. +def to_little_endian(string_32: str) -> str: + """ + Converts the given string to little-endian in groups of 8 chars. Arguments: - bit_string_32 {[string]} -- [32 bit binary] + string_32 {[string]} -- [32-char string] Raises: - ValueError -- [if the given string not are 32 bit binary string] + ValueError -- [input is not 32 char] Returns: - [string] -- [32 bit binary string] + 32-char little-endian string >>> to_little_endian('1234567890abcdfghijklmnopqrstuvw') 'pqrstuvwhijklmno90abcdfg12345678' + >>> to_little_endian('1234567890') + Traceback (most recent call last): + ... + ValueError: Input must be of length 32 """ + if len(string_32) != 32: + raise ValueError("Input must be of length 32") - if len(bit_string_32) != 32: - raise ValueError("Need length 32") little_endian = "" for i in [3, 2, 1, 0]: - little_endian += bit_string_32[8 * i : 8 * i + 8] + little_endian += string_32[8 * i : 8 * i + 8] return little_endian def reformat_hex(i: int) -> str: - """[summary] - Converts the given integer into 8-char little-endian hex number. + """ + Converts the given non-negative integer to hex string. + + Example: Suppose the input is the following: + i = 1234 + + The input is 0x000004d2 in hex, so the little-endian hex string is + "d2040000". Arguments: i {[int]} -- [integer] + + Raises: + ValueError -- [input is negative] + + Returns: + 8-char little-endian hex string + + >>> reformat_hex(1234) + 'd2040000' >>> reformat_hex(666) '9a020000' + >>> reformat_hex(0) + '00000000' + >>> reformat_hex(1234567890) + 'd2029649' + >>> reformat_hex(1234567890987654321) + 'b11c6cb1' + >>> reformat_hex(-1) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative """ + if i < 0: + raise ValueError("Input must be non-negative") - hex_rep = format(i, "08x") + hex_rep = format(i, "08x")[-8:] little_endian_hex = "" for i in [3, 2, 1, 0]: little_endian_hex += hex_rep[2 * i : 2 * i + 2] @@ -44,40 +88,94 @@ def reformat_hex(i: int) -> str: def preprocess(message: str) -> str: - """[summary] + """ Preprocesses the message string: - Convert message to bit string - - Pad bit string to a multiple of 512 bits: + - Pad bit string to a multiple of 512 chars: - Append a 1 - - Append 0's until length = 448 (mod 512), 64 bits short of a multiple of 512 - - Append length of original message (64 bits) + - Append 0's until length = 448 (mod 512) + - Append length of original message (64 chars) + + Example: Suppose the input is the following: + message = "a" + + The message bit string is "01100001", which is 8 bits long. Thus, the + bit string needs 439 bits of padding so that + (bit_string + "1" + padding) = 448 (mod 512). + The message length is "000010000...0" in 64-bit little-endian binary. + The combined bit string is then 512 bits long. Arguments: - message {[string]} -- [message string] + message {[string]} -- [message string] Returns: - [string] -- [padded bit string] + processed bit string padded to a multiple of 512 chars + + >>> preprocess("a") == "01100001" + "1" + ("0" * 439) + "00001000" + ("0" * 56) + True + >>> preprocess("") == "1" + ("0" * 447) + ("0" * 64) + True """ bit_string = "" for char in message: bit_string += format(ord(char), "08b") start_len = format(len(bit_string), "064b") + + # Pad bit_string to a multiple of 512 chars bit_string += "1" while len(bit_string) % 512 != 448: bit_string += "0" bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32]) + return bit_string def get_block_words(bit_string: str) -> Generator[list[int], None, None]: - """[summary] - Iterator: - Returns by each call a list of length 16 with the 32-bit - integer blocks. + """ + Splits bit string into blocks of 512 chars and yields each block as a list + of 32-bit words + + Example: Suppose the input is the following: + bit_string = + "000000000...0" + # 0x00 (32 bits, padded to the right) + "000000010...0" + # 0x01 (32 bits, padded to the right) + "000000100...0" + # 0x02 (32 bits, padded to the right) + "000000110...0" + # 0x03 (32 bits, padded to the right) + ... + "000011110...0" # 0x0a (32 bits, padded to the right) + + Then len(bit_string) == 512, so there'll be 1 block. The block is split + into 32-bit words, and each word is converted to little endian. The + first word is interpreted as 0 in decimal, the second word is + interpreted as 1 in decimal, etc. + + Thus, block_words == [[0, 1, 2, 3, ..., 15]]. Arguments: - bit_string {[string]} -- [binary string >= 512] + bit_string {[string]} -- [bit string with multiple of 512 as length] + + Raises: + ValueError -- [length of bit string isn't multiple of 512] + + Yields: + a list of 16 32-bit words + + >>> test_string = "".join(format(n << 24, "032b") for n in range(16)) + >>> list(get_block_words(test_string)) + [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] + >>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4 + True + >>> list(get_block_words("1" * 512)) == [[4294967295] * 16] + True + >>> list(get_block_words("")) + [] + >>> list(get_block_words("1111")) + Traceback (most recent call last): + ... + ValueError: Input must have length that's a multiple of 512 """ + if len(bit_string) % 512 != 0: + raise ValueError("Input must have length that's a multiple of 512") for pos in range(0, len(bit_string), 512): block = bit_string[pos : pos + 512] @@ -89,9 +187,35 @@ def get_block_words(bit_string: str) -> Generator[list[int], None, None]: def not_32(i: int) -> int: """ + Perform bitwise NOT on given int. + + Arguments: + i {[int]} -- [given int] + + Raises: + ValueError -- [input is negative] + + Returns: + Result of bitwise NOT on i + >>> not_32(34) 4294967261 + >>> not_32(1234) + 4294966061 + >>> not_32(4294966061) + 1234 + >>> not_32(0) + 4294967295 + >>> not_32(1) + 4294967294 + >>> not_32(-1) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative """ + if i < 0: + raise ValueError("Input must be non-negative") + i_str = format(i, "032b") new_str = "" for c in i_str: @@ -100,20 +224,86 @@ def not_32(i: int) -> int: def sum_32(a: int, b: int) -> int: + """ + Add two numbers as 32-bit ints. + + Arguments: + a {[int]} -- [first given int] + b {[int]} -- [second given int] + + Returns: + (a + b) as an unsigned 32-bit int + + >>> sum_32(1, 1) + 2 + >>> sum_32(2, 3) + 5 + >>> sum_32(0, 0) + 0 + >>> sum_32(-1, -1) + 4294967294 + >>> sum_32(4294967295, 1) + 0 + """ return (a + b) % 2**32 def left_rotate_32(i: int, shift: int) -> int: - return (i << shift) ^ (i >> (32 - shift)) + """ + Rotate the bits of a given int left by a given amount. + + Arguments: + i {[int]} -- [given int] + shift {[int]} -- [shift amount] + + Raises: + ValueError -- [either given int or shift is negative] + + Returns: + `i` rotated to the left by `shift` bits + + >>> left_rotate_32(1234, 1) + 2468 + >>> left_rotate_32(1111, 4) + 17776 + >>> left_rotate_32(2147483648, 1) + 1 + >>> left_rotate_32(2147483648, 3) + 4 + >>> left_rotate_32(4294967295, 4) + 4294967295 + >>> left_rotate_32(1234, 0) + 1234 + >>> left_rotate_32(0, 0) + 0 + >>> left_rotate_32(-1, 0) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative + >>> left_rotate_32(0, -1) + Traceback (most recent call last): + ... + ValueError: Shift must be non-negative + """ + if i < 0: + raise ValueError("Input must be non-negative") + if shift < 0: + raise ValueError("Shift must be non-negative") + return ((i << shift) ^ (i >> (32 - shift))) % 2**32 def md5_me(message: str) -> str: - """[summary] - Returns a 32-bit hash of the string 'message' + """ + Returns the 32-char MD5 hash of a given message. + + Reference: https://en.wikipedia.org/wiki/MD5#Algorithm Arguments: message {[string]} -- [message] + Returns: + 32-char MD5 hash string + >>> md5_me("") 'd41d8cd98f00b204e9800998ecf8427e' >>> md5_me("The quick brown fox jumps over the lazy dog") @@ -122,10 +312,12 @@ def md5_me(message: str) -> str: 'e4d909c290d0fb1ca068ffaddf22cbd0' """ + # Convert to bit string, add padding and append message length bit_string = preprocess(message) added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)] + # Starting states a0 = 0x67452301 b0 = 0xEFCDAB89 c0 = 0x98BADCFE @@ -198,18 +390,21 @@ def md5_me(message: str) -> str: 21, ] + # Process bit string in chunks, each with 16 32-char words for block_words in get_block_words(bit_string): a = a0 b = b0 c = c0 d = d0 + + # Hash current chunk for i in range(64): if i <= 15: - # f = (b & c) | (not_32(b) & d) + # f = (b & c) | (not_32(b) & d) # Alternate definition for f f = d ^ (b & (c ^ d)) g = i elif i <= 31: - # f = (d & b) | (not_32(d) & c) + # f = (d & b) | (not_32(d) & c) # Alternate definition for f f = c ^ (d & (b ^ c)) g = (5 * i + 1) % 16 elif i <= 47: @@ -223,6 +418,8 @@ def md5_me(message: str) -> str: d = c c = b b = sum_32(b, left_rotate_32(f, shift_amounts[i])) + + # Add hashed chunk to running total a0 = sum_32(a0, a) b0 = sum_32(b0, b) c0 = sum_32(c0, c) From f7ba9da92ae49f8c191877c1c17318c24c74600c Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Mon, 2 Jan 2023 01:57:11 +0000 Subject: [PATCH 10/30] updating DIRECTORY.md --- DIRECTORY.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DIRECTORY.md b/DIRECTORY.md index 3437df12cbf5..5ce9dca74c06 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -123,6 +123,7 @@ * [Huffman](compression/huffman.py) * [Lempel Ziv](compression/lempel_ziv.py) * [Lempel Ziv Decompress](compression/lempel_ziv_decompress.py) + * [Lz77](compression/lz77.py) * [Peak Signal To Noise Ratio](compression/peak_signal_to_noise_ratio.py) * [Run Length Encoding](compression/run_length_encoding.py) @@ -1162,7 +1163,7 @@ * [Get Amazon Product Data](web_programming/get_amazon_product_data.py) * [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py) * [Get Imdbtop](web_programming/get_imdbtop.py) - * [Get Top Billioners](web_programming/get_top_billioners.py) + * [Get Top Billionaires](web_programming/get_top_billionaires.py) * [Get Top Hn Posts](web_programming/get_top_hn_posts.py) * [Get User Tweets](web_programming/get_user_tweets.py) * [Giphy](web_programming/giphy.py) From d289ade01b12c59e254fe1671352d8a74b2529c5 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Mon, 2 Jan 2023 01:57:13 +0000 Subject: [PATCH 11/30] updating DIRECTORY.md --- DIRECTORY.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DIRECTORY.md b/DIRECTORY.md index 3437df12cbf5..5ce9dca74c06 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -123,6 +123,7 @@ * [Huffman](compression/huffman.py) * [Lempel Ziv](compression/lempel_ziv.py) * [Lempel Ziv Decompress](compression/lempel_ziv_decompress.py) + * [Lz77](compression/lz77.py) * [Peak Signal To Noise Ratio](compression/peak_signal_to_noise_ratio.py) * [Run Length Encoding](compression/run_length_encoding.py) @@ -1162,7 +1163,7 @@ * [Get Amazon Product Data](web_programming/get_amazon_product_data.py) * [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py) * [Get Imdbtop](web_programming/get_imdbtop.py) - * [Get Top Billioners](web_programming/get_top_billioners.py) + * [Get Top Billionaires](web_programming/get_top_billionaires.py) * [Get Top Hn Posts](web_programming/get_top_hn_posts.py) * [Get User Tweets](web_programming/get_user_tweets.py) * [Giphy](web_programming/giphy.py) From 2aea9a223b2fb03bfdb4205cbcb2e0859c17d4cd Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Thu, 12 Jan 2023 15:24:15 +0000 Subject: [PATCH 12/30] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 5ce9dca74c06..31e86ea59b79 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -557,6 +557,7 @@ * [Gamma Recursive](maths/gamma_recursive.py) * [Gaussian](maths/gaussian.py) * [Gaussian Error Linear Unit](maths/gaussian_error_linear_unit.py) + * [Gcd Of N Numbers](maths/gcd_of_n_numbers.py) * [Greatest Common Divisor](maths/greatest_common_divisor.py) * [Greedy Coin Change](maths/greedy_coin_change.py) * [Hamming Numbers](maths/hamming_numbers.py) From 9d1971b11f736898b1ff2112aa0de470977224c0 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Thu, 12 Jan 2023 15:25:40 +0000 Subject: [PATCH 13/30] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 5ce9dca74c06..31e86ea59b79 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -557,6 +557,7 @@ * [Gamma Recursive](maths/gamma_recursive.py) * [Gaussian](maths/gaussian.py) * [Gaussian Error Linear Unit](maths/gaussian_error_linear_unit.py) + * [Gcd Of N Numbers](maths/gcd_of_n_numbers.py) * [Greatest Common Divisor](maths/greatest_common_divisor.py) * [Greedy Coin Change](maths/greedy_coin_change.py) * [Hamming Numbers](maths/hamming_numbers.py) From 5f404b482ffe5f6ac26e87ea18ed74be042be7c0 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Thu, 26 Jan 2023 07:26:59 +0000 Subject: [PATCH 14/30] updating DIRECTORY.md --- DIRECTORY.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DIRECTORY.md b/DIRECTORY.md index 31e86ea59b79..a8786cc2591f 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -543,8 +543,7 @@ * [Euler Modified](maths/euler_modified.py) * [Eulers Totient](maths/eulers_totient.py) * [Extended Euclidean Algorithm](maths/extended_euclidean_algorithm.py) - * [Factorial Iterative](maths/factorial_iterative.py) - * [Factorial Recursive](maths/factorial_recursive.py) + * [Factorial](maths/factorial.py) * [Factors](maths/factors.py) * [Fermat Little Theorem](maths/fermat_little_theorem.py) * [Fibonacci](maths/fibonacci.py) From 3fbe643ab39b42bb8fba8676f933f9b38cfdf599 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 13:33:17 -0800 Subject: [PATCH 15/30] Add type hints to md5.py --- hashes/md5.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 2020bf2e53bf..e36aff96129d 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,7 +1,8 @@ import math +from collections.abc import Generator -def rearrange(bit_string_32): +def rearrange(bit_string_32: str) -> str: """[summary] Regroups the given binary string. @@ -25,7 +26,7 @@ def rearrange(bit_string_32): return new_string -def reformat_hex(i): +def reformat_hex(i: int) -> str: """[summary] Converts the given integer into 8-digit hex number. @@ -42,12 +43,12 @@ def reformat_hex(i): return thing -def pad(bit_string): +def pad(bit_string: str) -> str: """[summary] Fills up the binary string to a 512 bit binary string Arguments: - bitString {[string]} -- [binary string] + bit_string {[string]} -- [binary string] Returns: [string] -- [binary string] @@ -61,10 +62,10 @@ def pad(bit_string): return bit_string -def get_block(bit_string): +def get_block(bit_string: str) -> Generator[list[int], None, None]: """[summary] Iterator: - Returns by each call a list of length 16 with the 32 bit + Returns by each call a list of length 16 with the 32-bit integer blocks. Arguments: @@ -81,7 +82,7 @@ def get_block(bit_string): curr_pos += 512 -def not32(i): +def not32(i: int) -> int: """ >>> not32(34) 4294967261 @@ -93,25 +94,25 @@ def not32(i): return int(new_str, 2) -def sum32(a, b): +def sum32(a: int, b: int) -> int: return (a + b) % 2**32 -def leftrot32(i, s): +def leftrot32(i: int, s: int) -> int: return (i << s) ^ (i >> (32 - s)) -def md5me(test_string): +def md5me(test_string: str) -> str: """[summary] - Returns a 32-bit hash code of the string 'testString' + Returns a 32-bit hash code of the string 'test_string' Arguments: - testString {[string]} -- [message] + test_string {[string]} -- [message] """ bs = "" - for i in test_string: - bs += format(ord(i), "08b") + for char in test_string: + bs += format(ord(char), "08b") bs = pad(bs) tvals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] @@ -222,7 +223,7 @@ def md5me(test_string): return digest -def test(): +def test() -> None: assert md5me("") == "d41d8cd98f00b204e9800998ecf8427e" assert ( md5me("The quick brown fox jumps over the lazy dog") From adfe215395ba139624f90be22a3c8858a9fe3151 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 13:50:44 -0800 Subject: [PATCH 16/30] Rename some vars to snake case --- hashes/md5.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index e36aff96129d..f0a48f43ec11 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -36,10 +36,10 @@ def reformat_hex(i: int) -> str: '9a020000' """ - hexrep = format(i, "08x") + hex_rep = format(i, "08x") thing = "" for i in [3, 2, 1, 0]: - thing += hexrep[2 * i : 2 * i + 2] + thing += hex_rep[2 * i : 2 * i + 2] return thing @@ -82,9 +82,9 @@ def get_block(bit_string: str) -> Generator[list[int], None, None]: curr_pos += 512 -def not32(i: int) -> int: +def not_32(i: int) -> int: """ - >>> not32(34) + >>> not_32(34) 4294967261 """ i_str = format(i, "032b") @@ -94,15 +94,15 @@ def not32(i: int) -> int: return int(new_str, 2) -def sum32(a: int, b: int) -> int: +def sum_32(a: int, b: int) -> int: return (a + b) % 2**32 -def leftrot32(i: int, s: int) -> int: +def left_rotate_32(i: int, s: int) -> int: return (i << s) ^ (i >> (32 - s)) -def md5me(test_string: str) -> str: +def md5_me(test_string: str) -> str: """[summary] Returns a 32-bit hash code of the string 'test_string' @@ -115,7 +115,7 @@ def md5me(test_string: str) -> str: bs += format(ord(char), "08b") bs = pad(bs) - tvals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] + t_vals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] a0 = 0x67452301 b0 = 0xEFCDAB89 @@ -196,37 +196,37 @@ def md5me(test_string: str) -> str: d = d0 for i in range(64): if i <= 15: - # f = (B & C) | (not32(B) & D) + # f = (B & C) | (not_32(B) & D) f = d ^ (b & (c ^ d)) g = i elif i <= 31: - # f = (D & B) | (not32(D) & C) + # f = (D & B) | (not_32(D) & C) f = c ^ (d & (b ^ c)) g = (5 * i + 1) % 16 elif i <= 47: f = b ^ c ^ d g = (3 * i + 5) % 16 else: - f = c ^ (b | not32(d)) + f = c ^ (b | not_32(d)) g = (7 * i) % 16 - dtemp = d + d_temp = d d = c c = b - b = sum32(b, leftrot32((a + f + tvals[i] + m[g]) % 2**32, s[i])) - a = dtemp - a0 = sum32(a0, a) - b0 = sum32(b0, b) - c0 = sum32(c0, c) - d0 = sum32(d0, d) + b = sum_32(b, left_rotate_32((a + f + t_vals[i] + m[g]) % 2**32, s[i])) + a = d_temp + a0 = sum_32(a0, a) + b0 = sum_32(b0, b) + c0 = sum_32(c0, c) + d0 = sum_32(d0, d) digest = reformat_hex(a0) + reformat_hex(b0) + reformat_hex(c0) + reformat_hex(d0) return digest def test() -> None: - assert md5me("") == "d41d8cd98f00b204e9800998ecf8427e" + assert md5_me("") == "d41d8cd98f00b204e9800998ecf8427e" assert ( - md5me("The quick brown fox jumps over the lazy dog") + md5_me("The quick brown fox jumps over the lazy dog") == "9e107d9d372bb6826bd81d3542a419d6" ) print("Success.") From 17fc171af0e22030f735280a5c5d9698e923e631 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 13:52:07 -0800 Subject: [PATCH 17/30] Specify functions imported from math --- hashes/md5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index f0a48f43ec11..781d9e6b6f5b 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,5 +1,5 @@ -import math from collections.abc import Generator +from math import sin def rearrange(bit_string_32: str) -> str: @@ -115,7 +115,7 @@ def md5_me(test_string: str) -> str: bs += format(ord(char), "08b") bs = pad(bs) - t_vals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] + t_vals = [int(2**32 * abs(sin(i + 1))) for i in range(64)] a0 = 0x67452301 b0 = 0xEFCDAB89 From 24006762e0ab1fd7a332611f2fcb8f619b023316 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:11:19 -0800 Subject: [PATCH 18/30] Rename vars and functions to be more descriptive --- hashes/md5.py | 66 +++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 781d9e6b6f5b..36b069b33be4 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -2,19 +2,19 @@ from math import sin -def rearrange(bit_string_32: str) -> str: +def to_little_endian(bit_string_32: str) -> str: """[summary] - Regroups the given binary string. + Converts the given binary string to little-endian in groups of 8 bits. Arguments: - bitString32 {[string]} -- [32 bit binary] + bit_string_32 {[string]} -- [32 bit binary] Raises: - ValueError -- [if the given string not are 32 bit binary string] + ValueError -- [if the given string not are 32 bit binary string] Returns: [string] -- [32 bit binary string] - >>> rearrange('1234567890abcdfghijklmnopqrstuvw') + >>> to_little_endian('1234567890abcdfghijklmnopqrstuvw') 'pqrstuvwhijklmno90abcdfg12345678' """ @@ -28,7 +28,7 @@ def rearrange(bit_string_32: str) -> str: def reformat_hex(i: int) -> str: """[summary] - Converts the given integer into 8-digit hex number. + Converts the given integer into 8-char hex number. Arguments: i {[int]} -- [integer] @@ -43,7 +43,7 @@ def reformat_hex(i: int) -> str: return thing -def pad(bit_string: str) -> str: +def preprocess(bit_string: str) -> str: """[summary] Fills up the binary string to a 512 bit binary string @@ -58,11 +58,11 @@ def pad(bit_string: str) -> str: while len(bit_string) % 512 != 448: bit_string += "0" last_part = format(start_length, "064b") - bit_string += rearrange(last_part[32:]) + rearrange(last_part[:32]) + bit_string += to_little_endian(last_part[32:]) + to_little_endian(last_part[:32]) return bit_string -def get_block(bit_string: str) -> Generator[list[int], None, None]: +def get_block_words(bit_string: str) -> Generator[list[int], None, None]: """[summary] Iterator: Returns by each call a list of length 16 with the 32-bit @@ -72,14 +72,14 @@ def get_block(bit_string: str) -> Generator[list[int], None, None]: bit_string {[string]} -- [binary string >= 512] """ - curr_pos = 0 - while curr_pos < len(bit_string): - curr_part = bit_string[curr_pos : curr_pos + 512] - my_splits = [] + pos = 0 + while pos < len(bit_string): + block = bit_string[pos : pos + 512] + block_words = [] for i in range(16): - my_splits.append(int(rearrange(curr_part[32 * i : 32 * i + 32]), 2)) - yield my_splits - curr_pos += 512 + block_words.append(int(to_little_endian(block[32 * i : 32 * i + 32]), 2)) + yield block_words + pos += 512 def not_32(i: int) -> int: @@ -98,31 +98,31 @@ def sum_32(a: int, b: int) -> int: return (a + b) % 2**32 -def left_rotate_32(i: int, s: int) -> int: - return (i << s) ^ (i >> (32 - s)) +def left_rotate_32(i: int, shift: int) -> int: + return (i << shift) ^ (i >> (32 - shift)) -def md5_me(test_string: str) -> str: +def md5_me(message: str) -> str: """[summary] - Returns a 32-bit hash code of the string 'test_string' + Returns a 32-bit hash of the string 'message' Arguments: - test_string {[string]} -- [message] + message {[string]} -- [message] """ - bs = "" - for char in test_string: - bs += format(ord(char), "08b") - bs = pad(bs) + bit_string = "" + for char in message: + bit_string += format(ord(char), "08b") + bit_string = preprocess(bit_string) - t_vals = [int(2**32 * abs(sin(i + 1))) for i in range(64)] + added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)] a0 = 0x67452301 b0 = 0xEFCDAB89 c0 = 0x98BADCFE d0 = 0x10325476 - s = [ + shift_amounts = [ 7, 12, 17, @@ -189,18 +189,18 @@ def md5_me(test_string: str) -> str: 21, ] - for m in get_block(bs): + for block_words in get_block_words(bit_string): a = a0 b = b0 c = c0 d = d0 for i in range(64): if i <= 15: - # f = (B & C) | (not_32(B) & D) + # f = (b & c) | (not_32(b) & d) f = d ^ (b & (c ^ d)) g = i elif i <= 31: - # f = (D & B) | (not_32(D) & C) + # f = (d & b) | (not_32(d) & c) f = c ^ (d & (b ^ c)) g = (5 * i + 1) % 16 elif i <= 47: @@ -209,11 +209,11 @@ def md5_me(test_string: str) -> str: else: f = c ^ (b | not_32(d)) g = (7 * i) % 16 - d_temp = d + f = (f + a + added_consts[i] + block_words[g]) % 2**32 + a = d d = c c = b - b = sum_32(b, left_rotate_32((a + f + t_vals[i] + m[g]) % 2**32, s[i])) - a = d_temp + b = sum_32(b, left_rotate_32(f, shift_amounts[i])) a0 = sum_32(a0, a) b0 = sum_32(b0, b) c0 = sum_32(c0, c) From cd501ba8a88ed817c5490181729d4fee481a6a98 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:16:38 -0800 Subject: [PATCH 19/30] Make tests from test function into doctests --- hashes/md5.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 36b069b33be4..f5018cad8492 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -108,6 +108,13 @@ def md5_me(message: str) -> str: Arguments: message {[string]} -- [message] + + >>> md5_me("") + 'd41d8cd98f00b204e9800998ecf8427e' + >>> md5_me("The quick brown fox jumps over the lazy dog") + '9e107d9d372bb6826bd81d3542a419d6' + >>> md5_me("The quick brown fox jumps over the lazy dog.") + 'e4d909c290d0fb1ca068ffaddf22cbd0' """ bit_string = "" @@ -223,17 +230,7 @@ def md5_me(message: str) -> str: return digest -def test() -> None: - assert md5_me("") == "d41d8cd98f00b204e9800998ecf8427e" - assert ( - md5_me("The quick brown fox jumps over the lazy dog") - == "9e107d9d372bb6826bd81d3542a419d6" - ) - print("Success.") - - if __name__ == "__main__": - test() import doctest doctest.testmod() From feefe882228ab0d8398b39782c6a0a6263b84f5c Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:26:47 -0800 Subject: [PATCH 20/30] Clarify more var names --- hashes/md5.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index f5018cad8492..4b20bfadf367 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -4,7 +4,7 @@ def to_little_endian(bit_string_32: str) -> str: """[summary] - Converts the given binary string to little-endian in groups of 8 bits. + Converts the given binary string to little-endian in groups of 8 chars. Arguments: bit_string_32 {[string]} -- [32 bit binary] @@ -20,27 +20,27 @@ def to_little_endian(bit_string_32: str) -> str: if len(bit_string_32) != 32: raise ValueError("Need length 32") - new_string = "" + little_endian = "" for i in [3, 2, 1, 0]: - new_string += bit_string_32[8 * i : 8 * i + 8] - return new_string + little_endian += bit_string_32[8 * i : 8 * i + 8] + return little_endian def reformat_hex(i: int) -> str: """[summary] - Converts the given integer into 8-char hex number. + Converts the given integer into 8-char little-endian hex number. Arguments: - i {[int]} -- [integer] + i {[int]} -- [integer] >>> reformat_hex(666) '9a020000' """ hex_rep = format(i, "08x") - thing = "" + little_endian_hex = "" for i in [3, 2, 1, 0]: - thing += hex_rep[2 * i : 2 * i + 2] - return thing + little_endian_hex += hex_rep[2 * i : 2 * i + 2] + return little_endian_hex def preprocess(bit_string: str) -> str: @@ -65,11 +65,11 @@ def preprocess(bit_string: str) -> str: def get_block_words(bit_string: str) -> Generator[list[int], None, None]: """[summary] Iterator: - Returns by each call a list of length 16 with the 32-bit - integer blocks. + Returns by each call a list of length 16 with the 32-bit + integer blocks. Arguments: - bit_string {[string]} -- [binary string >= 512] + bit_string {[string]} -- [binary string >= 512] """ pos = 0 @@ -107,7 +107,7 @@ def md5_me(message: str) -> str: Returns a 32-bit hash of the string 'message' Arguments: - message {[string]} -- [message] + message {[string]} -- [message] >>> md5_me("") 'd41d8cd98f00b204e9800998ecf8427e' From 2b7a4650fb205d924f651e08455511c7cbc1a9d9 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:36:52 -0800 Subject: [PATCH 21/30] Refactor some MD5 code into preprocess function --- hashes/md5.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 4b20bfadf367..2bf98772a7b9 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -43,22 +43,29 @@ def reformat_hex(i: int) -> str: return little_endian_hex -def preprocess(bit_string: str) -> str: +def preprocess(message: str) -> str: """[summary] - Fills up the binary string to a 512 bit binary string + Preprocesses the message string: + - Convert message to bit string + - Pad bit string to a multiple of 512 bits: + - Append a 1 + - Append 0's until length = 448 (mod 512), 64 bits short of a multiple of 512 + - Append length of original message (64 bits) Arguments: - bit_string {[string]} -- [binary string] + message {[string]} -- [message string] Returns: - [string] -- [binary string] + [string] -- [padded bit string] """ - start_length = len(bit_string) + bit_string = "" + for char in message: + bit_string += format(ord(char), "08b") + start_len = format(len(bit_string), "064b") bit_string += "1" while len(bit_string) % 512 != 448: bit_string += "0" - last_part = format(start_length, "064b") - bit_string += to_little_endian(last_part[32:]) + to_little_endian(last_part[:32]) + bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32]) return bit_string @@ -117,10 +124,7 @@ def md5_me(message: str) -> str: 'e4d909c290d0fb1ca068ffaddf22cbd0' """ - bit_string = "" - for char in message: - bit_string += format(ord(char), "08b") - bit_string = preprocess(bit_string) + bit_string = preprocess(message) added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)] From 84f7ac3775d10d40eb15dc0b599cfdd8c4506d61 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 31 Dec 2022 19:48:50 -0800 Subject: [PATCH 22/30] Simplify loop indices in get_block_words --- hashes/md5.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 2bf98772a7b9..0aca6a5e34fc 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -79,14 +79,12 @@ def get_block_words(bit_string: str) -> Generator[list[int], None, None]: bit_string {[string]} -- [binary string >= 512] """ - pos = 0 - while pos < len(bit_string): + for pos in range(0, len(bit_string), 512): block = bit_string[pos : pos + 512] block_words = [] - for i in range(16): - block_words.append(int(to_little_endian(block[32 * i : 32 * i + 32]), 2)) + for i in range(0, 512, 32): + block_words.append(int(to_little_endian(block[i : i + 32]), 2)) yield block_words - pos += 512 def not_32(i: int) -> int: From 5fadb6e79b981eee4490dfdc790fedd2f41a06a8 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sun, 1 Jan 2023 00:25:40 -0800 Subject: [PATCH 23/30] Add more detailed comments, docs, and doctests --- hashes/md5.py | 253 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 225 insertions(+), 28 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 0aca6a5e34fc..cc83c5758c30 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,42 +1,86 @@ +""" +The MD5 algorithm is a hash function that's commonly used as a checksum to +detect data corruption. The algorithm works by processing a given message in +blocks of 512 bits, padding the message as needed. It uses the blocks to operate +a 128-bit state and performs a total of 64 such operations. Note that all values +are little-endian, so inputs are converted as needed. + +Although MD5 was used as a cryptographic hash function in the past, it's since +been cracked, so it shouldn't be used for security purposes. + +For more info, see https://en.wikipedia.org/wiki/MD5 +""" + from collections.abc import Generator from math import sin -def to_little_endian(bit_string_32: str) -> str: - """[summary] - Converts the given binary string to little-endian in groups of 8 chars. +def to_little_endian(string_32: str) -> str: + """ + Converts the given string to little-endian in groups of 8 chars. Arguments: - bit_string_32 {[string]} -- [32 bit binary] + string_32 {[string]} -- [32-char string] Raises: - ValueError -- [if the given string not are 32 bit binary string] + ValueError -- [input is not 32 char] Returns: - [string] -- [32 bit binary string] + 32-char little-endian string >>> to_little_endian('1234567890abcdfghijklmnopqrstuvw') 'pqrstuvwhijklmno90abcdfg12345678' + >>> to_little_endian('1234567890') + Traceback (most recent call last): + ... + ValueError: Input must be of length 32 """ + if len(string_32) != 32: + raise ValueError("Input must be of length 32") - if len(bit_string_32) != 32: - raise ValueError("Need length 32") little_endian = "" for i in [3, 2, 1, 0]: - little_endian += bit_string_32[8 * i : 8 * i + 8] + little_endian += string_32[8 * i : 8 * i + 8] return little_endian def reformat_hex(i: int) -> str: - """[summary] - Converts the given integer into 8-char little-endian hex number. + """ + Converts the given non-negative integer to hex string. + + Example: Suppose the input is the following: + i = 1234 + + The input is 0x000004d2 in hex, so the little-endian hex string is + "d2040000". Arguments: i {[int]} -- [integer] + + Raises: + ValueError -- [input is negative] + + Returns: + 8-char little-endian hex string + + >>> reformat_hex(1234) + 'd2040000' >>> reformat_hex(666) '9a020000' + >>> reformat_hex(0) + '00000000' + >>> reformat_hex(1234567890) + 'd2029649' + >>> reformat_hex(1234567890987654321) + 'b11c6cb1' + >>> reformat_hex(-1) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative """ + if i < 0: + raise ValueError("Input must be non-negative") - hex_rep = format(i, "08x") + hex_rep = format(i, "08x")[-8:] little_endian_hex = "" for i in [3, 2, 1, 0]: little_endian_hex += hex_rep[2 * i : 2 * i + 2] @@ -44,40 +88,94 @@ def reformat_hex(i: int) -> str: def preprocess(message: str) -> str: - """[summary] + """ Preprocesses the message string: - Convert message to bit string - - Pad bit string to a multiple of 512 bits: + - Pad bit string to a multiple of 512 chars: - Append a 1 - - Append 0's until length = 448 (mod 512), 64 bits short of a multiple of 512 - - Append length of original message (64 bits) + - Append 0's until length = 448 (mod 512) + - Append length of original message (64 chars) + + Example: Suppose the input is the following: + message = "a" + + The message bit string is "01100001", which is 8 bits long. Thus, the + bit string needs 439 bits of padding so that + (bit_string + "1" + padding) = 448 (mod 512). + The message length is "000010000...0" in 64-bit little-endian binary. + The combined bit string is then 512 bits long. Arguments: - message {[string]} -- [message string] + message {[string]} -- [message string] Returns: - [string] -- [padded bit string] + processed bit string padded to a multiple of 512 chars + + >>> preprocess("a") == "01100001" + "1" + ("0" * 439) + "00001000" + ("0" * 56) + True + >>> preprocess("") == "1" + ("0" * 447) + ("0" * 64) + True """ bit_string = "" for char in message: bit_string += format(ord(char), "08b") start_len = format(len(bit_string), "064b") + + # Pad bit_string to a multiple of 512 chars bit_string += "1" while len(bit_string) % 512 != 448: bit_string += "0" bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32]) + return bit_string def get_block_words(bit_string: str) -> Generator[list[int], None, None]: - """[summary] - Iterator: - Returns by each call a list of length 16 with the 32-bit - integer blocks. + """ + Splits bit string into blocks of 512 chars and yields each block as a list + of 32-bit words + + Example: Suppose the input is the following: + bit_string = + "000000000...0" + # 0x00 (32 bits, padded to the right) + "000000010...0" + # 0x01 (32 bits, padded to the right) + "000000100...0" + # 0x02 (32 bits, padded to the right) + "000000110...0" + # 0x03 (32 bits, padded to the right) + ... + "000011110...0" # 0x0a (32 bits, padded to the right) + + Then len(bit_string) == 512, so there'll be 1 block. The block is split + into 32-bit words, and each word is converted to little endian. The + first word is interpreted as 0 in decimal, the second word is + interpreted as 1 in decimal, etc. + + Thus, block_words == [[0, 1, 2, 3, ..., 15]]. Arguments: - bit_string {[string]} -- [binary string >= 512] + bit_string {[string]} -- [bit string with multiple of 512 as length] + + Raises: + ValueError -- [length of bit string isn't multiple of 512] + + Yields: + a list of 16 32-bit words + + >>> test_string = "".join(format(n << 24, "032b") for n in range(16)) + >>> list(get_block_words(test_string)) + [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] + >>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4 + True + >>> list(get_block_words("1" * 512)) == [[4294967295] * 16] + True + >>> list(get_block_words("")) + [] + >>> list(get_block_words("1111")) + Traceback (most recent call last): + ... + ValueError: Input must have length that's a multiple of 512 """ + if len(bit_string) % 512 != 0: + raise ValueError("Input must have length that's a multiple of 512") for pos in range(0, len(bit_string), 512): block = bit_string[pos : pos + 512] @@ -89,9 +187,35 @@ def get_block_words(bit_string: str) -> Generator[list[int], None, None]: def not_32(i: int) -> int: """ + Perform bitwise NOT on given int. + + Arguments: + i {[int]} -- [given int] + + Raises: + ValueError -- [input is negative] + + Returns: + Result of bitwise NOT on i + >>> not_32(34) 4294967261 + >>> not_32(1234) + 4294966061 + >>> not_32(4294966061) + 1234 + >>> not_32(0) + 4294967295 + >>> not_32(1) + 4294967294 + >>> not_32(-1) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative """ + if i < 0: + raise ValueError("Input must be non-negative") + i_str = format(i, "032b") new_str = "" for c in i_str: @@ -100,20 +224,86 @@ def not_32(i: int) -> int: def sum_32(a: int, b: int) -> int: + """ + Add two numbers as 32-bit ints. + + Arguments: + a {[int]} -- [first given int] + b {[int]} -- [second given int] + + Returns: + (a + b) as an unsigned 32-bit int + + >>> sum_32(1, 1) + 2 + >>> sum_32(2, 3) + 5 + >>> sum_32(0, 0) + 0 + >>> sum_32(-1, -1) + 4294967294 + >>> sum_32(4294967295, 1) + 0 + """ return (a + b) % 2**32 def left_rotate_32(i: int, shift: int) -> int: - return (i << shift) ^ (i >> (32 - shift)) + """ + Rotate the bits of a given int left by a given amount. + + Arguments: + i {[int]} -- [given int] + shift {[int]} -- [shift amount] + + Raises: + ValueError -- [either given int or shift is negative] + + Returns: + `i` rotated to the left by `shift` bits + + >>> left_rotate_32(1234, 1) + 2468 + >>> left_rotate_32(1111, 4) + 17776 + >>> left_rotate_32(2147483648, 1) + 1 + >>> left_rotate_32(2147483648, 3) + 4 + >>> left_rotate_32(4294967295, 4) + 4294967295 + >>> left_rotate_32(1234, 0) + 1234 + >>> left_rotate_32(0, 0) + 0 + >>> left_rotate_32(-1, 0) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative + >>> left_rotate_32(0, -1) + Traceback (most recent call last): + ... + ValueError: Shift must be non-negative + """ + if i < 0: + raise ValueError("Input must be non-negative") + if shift < 0: + raise ValueError("Shift must be non-negative") + return ((i << shift) ^ (i >> (32 - shift))) % 2**32 def md5_me(message: str) -> str: - """[summary] - Returns a 32-bit hash of the string 'message' + """ + Returns the 32-char MD5 hash of a given message. + + Reference: https://en.wikipedia.org/wiki/MD5#Algorithm Arguments: message {[string]} -- [message] + Returns: + 32-char MD5 hash string + >>> md5_me("") 'd41d8cd98f00b204e9800998ecf8427e' >>> md5_me("The quick brown fox jumps over the lazy dog") @@ -122,10 +312,12 @@ def md5_me(message: str) -> str: 'e4d909c290d0fb1ca068ffaddf22cbd0' """ + # Convert to bit string, add padding and append message length bit_string = preprocess(message) added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)] + # Starting states a0 = 0x67452301 b0 = 0xEFCDAB89 c0 = 0x98BADCFE @@ -198,18 +390,21 @@ def md5_me(message: str) -> str: 21, ] + # Process bit string in chunks, each with 16 32-char words for block_words in get_block_words(bit_string): a = a0 b = b0 c = c0 d = d0 + + # Hash current chunk for i in range(64): if i <= 15: - # f = (b & c) | (not_32(b) & d) + # f = (b & c) | (not_32(b) & d) # Alternate definition for f f = d ^ (b & (c ^ d)) g = i elif i <= 31: - # f = (d & b) | (not_32(d) & c) + # f = (d & b) | (not_32(d) & c) # Alternate definition for f f = c ^ (d & (b ^ c)) g = (5 * i + 1) % 16 elif i <= 47: @@ -223,6 +418,8 @@ def md5_me(message: str) -> str: d = c c = b b = sum_32(b, left_rotate_32(f, shift_amounts[i])) + + # Add hashed chunk to running total a0 = sum_32(a0, a) b0 = sum_32(b0, b) c0 = sum_32(c0, c) From 24457b9e9095fb28af722bdff50e509cd47c5d6c Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Mon, 2 Jan 2023 01:57:13 +0000 Subject: [PATCH 24/30] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 1d3177801a2c..35fdd5b089fa 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -1175,6 +1175,7 @@ * [Get Amazon Product Data](web_programming/get_amazon_product_data.py) * [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py) * [Get Imdbtop](web_programming/get_imdbtop.py) + * [Get Top Billionaires](web_programming/get_top_billionaires.py) * [Get Top Hn Posts](web_programming/get_top_hn_posts.py) * [Get User Tweets](web_programming/get_user_tweets.py) * [Giphy](web_programming/giphy.py) From 61d7761a86bf6d888761921c3da24e9f154a2e84 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Sun, 26 Mar 2023 14:39:42 +0000 Subject: [PATCH 25/30] updating DIRECTORY.md --- DIRECTORY.md | 1 - 1 file changed, 1 deletion(-) diff --git a/DIRECTORY.md b/DIRECTORY.md index 35fdd5b089fa..1d3177801a2c 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -1175,7 +1175,6 @@ * [Get Amazon Product Data](web_programming/get_amazon_product_data.py) * [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py) * [Get Imdbtop](web_programming/get_imdbtop.py) - * [Get Top Billionaires](web_programming/get_top_billionaires.py) * [Get Top Hn Posts](web_programming/get_top_hn_posts.py) * [Get User Tweets](web_programming/get_user_tweets.py) * [Giphy](web_programming/giphy.py) From aa1a18f276eeff3b249d84e19c23d9f835bedd07 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Sat, 1 Apr 2023 06:39:23 +0000 Subject: [PATCH 26/30] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 1a641d8ecb59..33c816fc4add 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -317,6 +317,7 @@ * [Longest Sub Array](dynamic_programming/longest_sub_array.py) * [Matrix Chain Order](dynamic_programming/matrix_chain_order.py) * [Max Non Adjacent Sum](dynamic_programming/max_non_adjacent_sum.py) + * [Max Product Subarray](dynamic_programming/max_product_subarray.py) * [Max Sub Array](dynamic_programming/max_sub_array.py) * [Max Sum Contiguous Subsequence](dynamic_programming/max_sum_contiguous_subsequence.py) * [Min Distance Up Bottom](dynamic_programming/min_distance_up_bottom.py) From c71f64a789595535fd3aaea70c9ef963f23744a6 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Sat, 1 Apr 2023 16:49:32 +0000 Subject: [PATCH 27/30] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index c781b17bf05f..588d0b1e542e 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -715,6 +715,7 @@ * [Archimedes Principle](physics/archimedes_principle.py) * [Casimir Effect](physics/casimir_effect.py) * [Centripetal Force](physics/centripetal_force.py) + * [Grahams Law](physics/grahams_law.py) * [Horizontal Projectile Motion](physics/horizontal_projectile_motion.py) * [Hubble Parameter](physics/hubble_parameter.py) * [Ideal Gas Law](physics/ideal_gas_law.py) From 17c76bac97a32904fd30ba21f8d163b314f0b521 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 1 Apr 2023 13:17:49 -0400 Subject: [PATCH 28/30] Convert str types to bytes --- hashes/md5.py | 68 ++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index cc83c5758c30..0a5cf3ad7569 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -15,7 +15,7 @@ from math import sin -def to_little_endian(string_32: str) -> str: +def to_little_endian(string_32: bytes) -> bytes: """ Converts the given string to little-endian in groups of 8 chars. @@ -27,9 +27,9 @@ def to_little_endian(string_32: str) -> str: Returns: 32-char little-endian string - >>> to_little_endian('1234567890abcdfghijklmnopqrstuvw') - 'pqrstuvwhijklmno90abcdfg12345678' - >>> to_little_endian('1234567890') + >>> to_little_endian(b'1234567890abcdfghijklmnopqrstuvw') + b'pqrstuvwhijklmno90abcdfg12345678' + >>> to_little_endian(b'1234567890') Traceback (most recent call last): ... ValueError: Input must be of length 32 @@ -37,13 +37,13 @@ def to_little_endian(string_32: str) -> str: if len(string_32) != 32: raise ValueError("Input must be of length 32") - little_endian = "" + little_endian = b"" for i in [3, 2, 1, 0]: little_endian += string_32[8 * i : 8 * i + 8] return little_endian -def reformat_hex(i: int) -> str: +def reformat_hex(i: int) -> bytes: """ Converts the given non-negative integer to hex string. @@ -63,15 +63,15 @@ def reformat_hex(i: int) -> str: 8-char little-endian hex string >>> reformat_hex(1234) - 'd2040000' + b'd2040000' >>> reformat_hex(666) - '9a020000' + b'9a020000' >>> reformat_hex(0) - '00000000' + b'00000000' >>> reformat_hex(1234567890) - 'd2029649' + b'd2029649' >>> reformat_hex(1234567890987654321) - 'b11c6cb1' + b'b11c6cb1' >>> reformat_hex(-1) Traceback (most recent call last): ... @@ -81,13 +81,13 @@ def reformat_hex(i: int) -> str: raise ValueError("Input must be non-negative") hex_rep = format(i, "08x")[-8:] - little_endian_hex = "" + little_endian_hex = b"" for i in [3, 2, 1, 0]: - little_endian_hex += hex_rep[2 * i : 2 * i + 2] + little_endian_hex += hex_rep[2 * i : 2 * i + 2].encode("utf-8") return little_endian_hex -def preprocess(message: str) -> str: +def preprocess(message: bytes) -> bytes: """ Preprocesses the message string: - Convert message to bit string @@ -111,26 +111,27 @@ def preprocess(message: str) -> str: Returns: processed bit string padded to a multiple of 512 chars - >>> preprocess("a") == "01100001" + "1" + ("0" * 439) + "00001000" + ("0" * 56) + >>> preprocess(b"a") == b"01100001" + b"1" + (b"0" * 439) + b"00001000" + \ + (b"0" * 56) True - >>> preprocess("") == "1" + ("0" * 447) + ("0" * 64) + >>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64) True """ - bit_string = "" + bit_string = b"" for char in message: - bit_string += format(ord(char), "08b") - start_len = format(len(bit_string), "064b") + bit_string += format(char, "08b").encode("utf-8") + start_len = format(len(bit_string), "064b").encode("utf-8") # Pad bit_string to a multiple of 512 chars - bit_string += "1" + bit_string += b"1" while len(bit_string) % 512 != 448: - bit_string += "0" + bit_string += b"0" bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32]) return bit_string -def get_block_words(bit_string: str) -> Generator[list[int], None, None]: +def get_block_words(bit_string: bytes) -> Generator[list[int], None, None]: """ Splits bit string into blocks of 512 chars and yields each block as a list of 32-bit words @@ -160,16 +161,17 @@ def get_block_words(bit_string: str) -> Generator[list[int], None, None]: Yields: a list of 16 32-bit words - >>> test_string = "".join(format(n << 24, "032b") for n in range(16)) + >>> test_string = "".join( \ + format(n << 24, "032b") for n in range(16)).encode("utf-8") >>> list(get_block_words(test_string)) [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] >>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4 True - >>> list(get_block_words("1" * 512)) == [[4294967295] * 16] + >>> list(get_block_words(b"1" * 512)) == [[4294967295] * 16] True - >>> list(get_block_words("")) + >>> list(get_block_words(b"")) [] - >>> list(get_block_words("1111")) + >>> list(get_block_words(b"1111")) Traceback (most recent call last): ... ValueError: Input must have length that's a multiple of 512 @@ -292,7 +294,7 @@ def left_rotate_32(i: int, shift: int) -> int: return ((i << shift) ^ (i >> (32 - shift))) % 2**32 -def md5_me(message: str) -> str: +def md5_me(message: bytes) -> bytes: """ Returns the 32-char MD5 hash of a given message. @@ -304,12 +306,12 @@ def md5_me(message: str) -> str: Returns: 32-char MD5 hash string - >>> md5_me("") - 'd41d8cd98f00b204e9800998ecf8427e' - >>> md5_me("The quick brown fox jumps over the lazy dog") - '9e107d9d372bb6826bd81d3542a419d6' - >>> md5_me("The quick brown fox jumps over the lazy dog.") - 'e4d909c290d0fb1ca068ffaddf22cbd0' + >>> md5_me(b"") + b'd41d8cd98f00b204e9800998ecf8427e' + >>> md5_me(b"The quick brown fox jumps over the lazy dog") + b'9e107d9d372bb6826bd81d3542a419d6' + >>> md5_me(b"The quick brown fox jumps over the lazy dog.") + b'e4d909c290d0fb1ca068ffaddf22cbd0' """ # Convert to bit string, add padding and append message length From c775f15f992430ee2f8beeacdb4199fc984670e9 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 1 Apr 2023 13:25:32 -0400 Subject: [PATCH 29/30] Add tests comparing md5_me to hashlib's md5 --- hashes/md5.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 0a5cf3ad7569..03d345e15726 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -112,7 +112,7 @@ def preprocess(message: bytes) -> bytes: processed bit string padded to a multiple of 512 chars >>> preprocess(b"a") == b"01100001" + b"1" + (b"0" * 439) + b"00001000" + \ - (b"0" * 56) + (b"0" * 56) True >>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64) True @@ -161,8 +161,8 @@ def get_block_words(bit_string: bytes) -> Generator[list[int], None, None]: Yields: a list of 16 32-bit words - >>> test_string = "".join( \ - format(n << 24, "032b") for n in range(16)).encode("utf-8") + >>> test_string = "".join(format(n << 24, "032b") for n in range(16)) \ + .encode("utf-8") >>> list(get_block_words(test_string)) [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] >>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4 @@ -312,6 +312,13 @@ def md5_me(message: bytes) -> bytes: b'9e107d9d372bb6826bd81d3542a419d6' >>> md5_me(b"The quick brown fox jumps over the lazy dog.") b'e4d909c290d0fb1ca068ffaddf22cbd0' + + >>> import hashlib + >>> from string import ascii_letters + >>> msgs = [b"", ascii_letters.encode("utf-8"), "Üñîçø∂é".encode("utf-8"), + ... b"The quick brown fox jumps over the lazy dog."] + >>> all(md5_me(msg) == hashlib.md5(msg).hexdigest().encode("utf-8") for msg in msgs) + True """ # Convert to bit string, add padding and append message length From 1f3842e5f3be5160ba6f7298a3c89d3d5a956eab Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 1 Apr 2023 14:14:21 -0400 Subject: [PATCH 30/30] Replace line-break backslashes with parentheses --- hashes/md5.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hashes/md5.py b/hashes/md5.py index 03d345e15726..2187006ec8a9 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -111,8 +111,8 @@ def preprocess(message: bytes) -> bytes: Returns: processed bit string padded to a multiple of 512 chars - >>> preprocess(b"a") == b"01100001" + b"1" + (b"0" * 439) + b"00001000" + \ - (b"0" * 56) + >>> preprocess(b"a") == (b"01100001" + b"1" + + ... (b"0" * 439) + b"00001000" + (b"0" * 56)) True >>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64) True @@ -161,8 +161,8 @@ def get_block_words(bit_string: bytes) -> Generator[list[int], None, None]: Yields: a list of 16 32-bit words - >>> test_string = "".join(format(n << 24, "032b") for n in range(16)) \ - .encode("utf-8") + >>> test_string = ("".join(format(n << 24, "032b") for n in range(16)) + ... .encode("utf-8")) >>> list(get_block_words(test_string)) [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] >>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4