From 99237e6e9ad9331269d247b0c43f535c006e072b Mon Sep 17 00:00:00 2001 From: Chong Yeol Nah Date: Tue, 16 May 2023 05:21:37 -0700 Subject: [PATCH] MSVC x64 support Hashes implented in MASM inspired by Project Nayuki Requires Build Tools for Visual Studio to cargo build https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=BuildTools&rel=17 Fixes #17 --- README.md | 4 +- md5/build.rs | 5 +- md5/src/x64_masm.asm | 160 +++++++++++++++++++++ sha1/build.rs | 5 +- sha1/src/x64_masm.asm | 231 ++++++++++++++++++++++++++++++ sha2/build.rs | 5 +- sha2/src/sha256_x64_masm.asm | 247 ++++++++++++++++++++++++++++++++ sha2/src/sha512_x64_masm.asm | 265 +++++++++++++++++++++++++++++++++++ whirlpool/build.rs | 5 +- 9 files changed, 920 insertions(+), 7 deletions(-) create mode 100644 md5/src/x64_masm.asm create mode 100644 sha1/src/x64_masm.asm create mode 100644 sha2/src/sha256_x64_masm.asm create mode 100644 sha2/src/sha512_x64_masm.asm diff --git a/README.md b/README.md index 75223f2..6cd4a4d 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,9 @@ For more information, see [#45]. All crates are tested on the following platforms: - Linux (32-bit and 64-bit x86) -- Windows (64-bit x86, GNU only) +- Windows (64-bit x86) - ARM64 (except `md5`, which is x86 only) -Windows MSVC builds are known to be broken. See [#17]. - ## Minimum Supported Rust Version All crates in this repository support **Rust 1.43** or higher. diff --git a/md5/build.rs b/md5/build.rs index b6376d0..e80d976 100644 --- a/md5/build.rs +++ b/md5/build.rs @@ -1,10 +1,13 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else { panic!("Unsupported target architecture"); }; diff --git a/md5/src/x64_masm.asm b/md5/src/x64_masm.asm new file mode 100644 index 0000000..9d41988 --- /dev/null +++ b/md5/src/x64_masm.asm @@ -0,0 +1,160 @@ +; +; MD5 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 4 r8d yes MD5 working variable A +; 4 r9d yes MD5 working variable B +; 4 r10d yes MD5 working variable C +; 4 r11d yes MD5 working variable D + + option casemap:none + + .const +ROUND macro i, a, b, c, d, k, s, t + +if i LT 16 + + ; eax = F(b,c,d) = (b & c) | (!b & d) = d ^ (b & (c ^ d)) + mov eax, c + xor eax, d + and eax, b + xor eax, d + +elseif i LT 32 + + ; eax = G(b,c,d) = (b & d) | (c & !d) = c ^ (d & (b ^ c)) + mov eax, c + xor eax, b + and eax, d + xor eax, c + +elseif i LT 48 + + ; eax = H(b,c,d) = b ^ c ^ d + mov eax, c + xor eax, d + xor eax, b + +else + + ; eax = I(b,c,d) = c ^ (b | !d) + mov eax, d + not eax + or eax, b + xor eax, c + +endif + + lea a, [eax + a + t] + add a, [rcx + k*4] + rol a, s + add a, b + endm + + .code + ; void md5_compress(const uint8_t block[64], uint32_t state[4]) + public md5_compress +md5_compress proc + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + + ; 64 rounds of hashing + ROUND 0, r8d, r9d, r10d, r11d, 0, 7, -28955B88h + ROUND 1, r11d, r8d, r9d, r10d, 1, 12, -173848AAh + ROUND 2, r10d, r11d, r8d, r9d, 2, 17, 242070DBh + ROUND 3, r9d, r10d, r11d, r8d, 3, 22, -3E423112h + ROUND 4, r8d, r9d, r10d, r11d, 4, 7, -0A83F051h + ROUND 5, r11d, r8d, r9d, r10d, 5, 12, 4787C62Ah + ROUND 6, r10d, r11d, r8d, r9d, 6, 17, -57CFB9EDh + ROUND 7, r9d, r10d, r11d, r8d, 7, 22, -02B96AFFh + ROUND 8, r8d, r9d, r10d, r11d, 8, 7, 698098D8h + ROUND 9, r11d, r8d, r9d, r10d, 9, 12, -74BB0851h + ROUND 10, r10d, r11d, r8d, r9d, 10, 17, -0000A44Fh + ROUND 11, r9d, r10d, r11d, r8d, 11, 22, -76A32842h + ROUND 12, r8d, r9d, r10d, r11d, 12, 7, 6B901122h + ROUND 13, r11d, r8d, r9d, r10d, 13, 12, -02678E6Dh + ROUND 14, r10d, r11d, r8d, r9d, 14, 17, -5986BC72h + ROUND 15, r9d, r10d, r11d, r8d, 15, 22, 49B40821h + ROUND 16, r8d, r9d, r10d, r11d, 1, 5, -09E1DA9Eh + ROUND 17, r11d, r8d, r9d, r10d, 6, 9, -3FBF4CC0h + ROUND 18, r10d, r11d, r8d, r9d, 11, 14, 265E5A51h + ROUND 19, r9d, r10d, r11d, r8d, 0, 20, -16493856h + ROUND 20, r8d, r9d, r10d, r11d, 5, 5, -29D0EFA3h + ROUND 21, r11d, r8d, r9d, r10d, 10, 9, 02441453h + ROUND 22, r10d, r11d, r8d, r9d, 15, 14, -275E197Fh + ROUND 23, r9d, r10d, r11d, r8d, 4, 20, -182C0438h + ROUND 24, r8d, r9d, r10d, r11d, 9, 5, 21E1CDE6h + ROUND 25, r11d, r8d, r9d, r10d, 14, 9, -3CC8F82Ah + ROUND 26, r10d, r11d, r8d, r9d, 3, 14, -0B2AF279h + ROUND 27, r9d, r10d, r11d, r8d, 8, 20, 455A14EDh + ROUND 28, r8d, r9d, r10d, r11d, 13, 5, -561C16FBh + ROUND 29, r11d, r8d, r9d, r10d, 2, 9, -03105C08h + ROUND 30, r10d, r11d, r8d, r9d, 7, 14, 676F02D9h + ROUND 31, r9d, r10d, r11d, r8d, 12, 20, -72D5B376h + ROUND 32, r8d, r9d, r10d, r11d, 5, 4, -0005C6BEh + ROUND 33, r11d, r8d, r9d, r10d, 8, 11, -788E097Fh + ROUND 34, r10d, r11d, r8d, r9d, 11, 16, 6D9D6122h + ROUND 35, r9d, r10d, r11d, r8d, 14, 23, -021AC7F4h + ROUND 36, r8d, r9d, r10d, r11d, 1, 4, -5B4115BCh + ROUND 37, r11d, r8d, r9d, r10d, 4, 11, 4BDECFA9h + ROUND 38, r10d, r11d, r8d, r9d, 7, 16, -0944B4A0h + ROUND 39, r9d, r10d, r11d, r8d, 10, 23, -41404390h + ROUND 40, r8d, r9d, r10d, r11d, 13, 4, 289B7EC6h + ROUND 41, r11d, r8d, r9d, r10d, 0, 11, -155ED806h + ROUND 42, r10d, r11d, r8d, r9d, 3, 16, -2B10CF7Bh + ROUND 43, r9d, r10d, r11d, r8d, 6, 23, 04881D05h + ROUND 44, r8d, r9d, r10d, r11d, 9, 4, -262B2FC7h + ROUND 45, r11d, r8d, r9d, r10d, 12, 11, -1924661Bh + ROUND 46, r10d, r11d, r8d, r9d, 15, 16, 1FA27CF8h + ROUND 47, r9d, r10d, r11d, r8d, 2, 23, -3B53A99Bh + ROUND 48, r8d, r9d, r10d, r11d, 0, 6, -0BD6DDBCh + ROUND 49, r11d, r8d, r9d, r10d, 7, 10, 432AFF97h + ROUND 50, r10d, r11d, r8d, r9d, 14, 15, -546BDC59h + ROUND 51, r9d, r10d, r11d, r8d, 5, 21, -036C5FC7h + ROUND 52, r8d, r9d, r10d, r11d, 12, 6, 655B59C3h + ROUND 53, r11d, r8d, r9d, r10d, 3, 10, -70F3336Eh + ROUND 54, r10d, r11d, r8d, r9d, 10, 15, -00100B83h + ROUND 55, r9d, r10d, r11d, r8d, 1, 21, -7A7BA22Fh + ROUND 56, r8d, r9d, r10d, r11d, 8, 6, 6FA87E4Fh + ROUND 57, r11d, r8d, r9d, r10d, 15, 10, -01D31920h + ROUND 58, r10d, r11d, r8d, r9d, 6, 15, -5CFEBCECh + ROUND 59, r9d, r10d, r11d, r8d, 13, 21, 4E0811A1h + ROUND 60, r8d, r9d, r10d, r11d, 4, 6, -08AC817Eh + ROUND 61, r11d, r8d, r9d, r10d, 11, 10, -42C50DCBh + ROUND 62, r10d, r11d, r8d, r9d, 2, 15, 2AD7D2BBh + ROUND 63, r9d, r10d, r11d, r8d, 9, 21, -14792C6Fh + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + ret +md5_compress endp + end diff --git a/sha1/build.rs b/sha1/build.rs index afed737..b97e0bd 100644 --- a/sha1/build.rs +++ b/sha1/build.rs @@ -1,11 +1,14 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); let target_vendor = std::env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else if target_arch == "aarch64" && target_vendor == "apple" { "src/aarch64_apple.S" } else if target_arch == "aarch64" { diff --git a/sha1/src/x64_masm.asm b/sha1/src/x64_masm.asm new file mode 100644 index 0000000..92032da --- /dev/null +++ b/sha1/src/x64_masm.asm @@ -0,0 +1,231 @@ +; +; SHA1 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 4 ebx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 8 rsp no x64 stack pointer +; 4 r8d yes SHA1 working variable A +; 4 r9d yes SHA1 working variable B +; 4 r10d yes SHA1 working variable C +; 4 r11d yes SHA1 working variable D +; 4 r12d no SHA1 working variable E +; 64 [rsp+0] no Circular buffer of most recent 16 message schedule items, 4 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*4]> + endm + +ROUNDTAIL macro a, b, e, k ; eax = f[i], e -> e + w[i] + ; (obj1) e -> a rol 5 + f[i] + e + w[i] + k[i] + ; (obj2) b -> b rol 30 + mov ebx, a + rol ebx, 5 + lea e, [ebx + e + k] ; e -> a rol 5 + e + w[i] + k[i] + add e, eax ; e -> a rol 5 + f[i] + e + w[i] + k[i] (obj1) + rol b, 30 ; b -> b rol 30 (obj2) + endm + +ROUND macro i, a, b, c, d, e + +if i LT 16 + + mov eax, [rcx + i*4] + bswap eax + +else + + mov eax, SCHED(i - 3) + xor eax, SCHED(i - 8) + xor eax, SCHED(i - 14) + xor eax, SCHED(i - 16) + rol eax, 1 + +endif + + mov SCHED(i), eax + add e, eax ; e -> e + w[i] + +if i LT 20 + + ; eax = f[i] = (b & c) ^ (~b & d) = d ^ b & (c ^ d) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~b is (1 + b) + ; bc + (1 + b)d = bc + d + bd = d + b(c + d) + mov eax, c + xor eax, d + and eax, b + xor eax, d + ROUNDTAIL a, b, e, 5A827999h + +elseif i GE 40 AND i LT 60 + + ; eax = f[i] = (b & c) ^ (b & d) ^ (c & d) = (b & (c | d)) | (c & d) + ; https://www.wolframalpha.com/input?i=simplify+%28b+%26%26+c%29+xor+%28b+%26%26+d%29+xor+%28c+%26%26+d%29 + mov eax, c + mov ebx, c + or eax, d + and eax, b + and ebx, d + or eax, ebx + ROUNDTAIL a, b, e, -70E44324h + +else + + ; eax = f[i] = b ^ c ^ d + mov eax, b + xor eax, c + xor eax, d + + if i LT 40 + + ROUNDTAIL a, b, e, 6ED9EBA1h + + else + + ROUNDTAIL a, b, e, -359D3E2Ah + + endif + +endif + + endm + + .code + ; void sha1_compress(const uint8_t block[64], uint32_t state[5]) + public sha1_compress +sha1_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push r12 + sub rsp, 64 + + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + mov r12d, [rdx + 16] ; e + + ; 80 rounds of hashing + ROUND 0, r8d, r9d, r10d, r11d, r12d + ROUND 1, r12d, r8d, r9d, r10d, r11d + ROUND 2, r11d, r12d, r8d, r9d, r10d + ROUND 3, r10d, r11d, r12d, r8d, r9d + ROUND 4, r9d, r10d, r11d, r12d, r8d + ROUND 5, r8d, r9d, r10d, r11d, r12d + ROUND 6, r12d, r8d, r9d, r10d, r11d + ROUND 7, r11d, r12d, r8d, r9d, r10d + ROUND 8, r10d, r11d, r12d, r8d, r9d + ROUND 9, r9d, r10d, r11d, r12d, r8d + ROUND 10, r8d, r9d, r10d, r11d, r12d + ROUND 11, r12d, r8d, r9d, r10d, r11d + ROUND 12, r11d, r12d, r8d, r9d, r10d + ROUND 13, r10d, r11d, r12d, r8d, r9d + ROUND 14, r9d, r10d, r11d, r12d, r8d + ROUND 15, r8d, r9d, r10d, r11d, r12d + ROUND 16, r12d, r8d, r9d, r10d, r11d + ROUND 17, r11d, r12d, r8d, r9d, r10d + ROUND 18, r10d, r11d, r12d, r8d, r9d + ROUND 19, r9d, r10d, r11d, r12d, r8d + ROUND 20, r8d, r9d, r10d, r11d, r12d + ROUND 21, r12d, r8d, r9d, r10d, r11d + ROUND 22, r11d, r12d, r8d, r9d, r10d + ROUND 23, r10d, r11d, r12d, r8d, r9d + ROUND 24, r9d, r10d, r11d, r12d, r8d + ROUND 25, r8d, r9d, r10d, r11d, r12d + ROUND 26, r12d, r8d, r9d, r10d, r11d + ROUND 27, r11d, r12d, r8d, r9d, r10d + ROUND 28, r10d, r11d, r12d, r8d, r9d + ROUND 29, r9d, r10d, r11d, r12d, r8d + ROUND 30, r8d, r9d, r10d, r11d, r12d + ROUND 31, r12d, r8d, r9d, r10d, r11d + ROUND 32, r11d, r12d, r8d, r9d, r10d + ROUND 33, r10d, r11d, r12d, r8d, r9d + ROUND 34, r9d, r10d, r11d, r12d, r8d + ROUND 35, r8d, r9d, r10d, r11d, r12d + ROUND 36, r12d, r8d, r9d, r10d, r11d + ROUND 37, r11d, r12d, r8d, r9d, r10d + ROUND 38, r10d, r11d, r12d, r8d, r9d + ROUND 39, r9d, r10d, r11d, r12d, r8d + ROUND 40, r8d, r9d, r10d, r11d, r12d + ROUND 41, r12d, r8d, r9d, r10d, r11d + ROUND 42, r11d, r12d, r8d, r9d, r10d + ROUND 43, r10d, r11d, r12d, r8d, r9d + ROUND 44, r9d, r10d, r11d, r12d, r8d + ROUND 45, r8d, r9d, r10d, r11d, r12d + ROUND 46, r12d, r8d, r9d, r10d, r11d + ROUND 47, r11d, r12d, r8d, r9d, r10d + ROUND 48, r10d, r11d, r12d, r8d, r9d + ROUND 49, r9d, r10d, r11d, r12d, r8d + ROUND 50, r8d, r9d, r10d, r11d, r12d + ROUND 51, r12d, r8d, r9d, r10d, r11d + ROUND 52, r11d, r12d, r8d, r9d, r10d + ROUND 53, r10d, r11d, r12d, r8d, r9d + ROUND 54, r9d, r10d, r11d, r12d, r8d + ROUND 55, r8d, r9d, r10d, r11d, r12d + ROUND 56, r12d, r8d, r9d, r10d, r11d + ROUND 57, r11d, r12d, r8d, r9d, r10d + ROUND 58, r10d, r11d, r12d, r8d, r9d + ROUND 59, r9d, r10d, r11d, r12d, r8d + ROUND 60, r8d, r9d, r10d, r11d, r12d + ROUND 61, r12d, r8d, r9d, r10d, r11d + ROUND 62, r11d, r12d, r8d, r9d, r10d + ROUND 63, r10d, r11d, r12d, r8d, r9d + ROUND 64, r9d, r10d, r11d, r12d, r8d + ROUND 65, r8d, r9d, r10d, r11d, r12d + ROUND 66, r12d, r8d, r9d, r10d, r11d + ROUND 67, r11d, r12d, r8d, r9d, r10d + ROUND 68, r10d, r11d, r12d, r8d, r9d + ROUND 69, r9d, r10d, r11d, r12d, r8d + ROUND 70, r8d, r9d, r10d, r11d, r12d + ROUND 71, r12d, r8d, r9d, r10d, r11d + ROUND 72, r11d, r12d, r8d, r9d, r10d + ROUND 73, r10d, r11d, r12d, r8d, r9d + ROUND 74, r9d, r10d, r11d, r12d, r8d + ROUND 75, r8d, r9d, r10d, r11d, r12d + ROUND 76, r12d, r8d, r9d, r10d, r11d + ROUND 77, r11d, r12d, r8d, r9d, r10d + ROUND 78, r10d, r11d, r12d, r8d, r9d + ROUND 79, r9d, r10d, r11d, r12d, r8d + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + add [rdx + 16], r12d + + ; Restore nonvolatile registers + add rsp, 64 + pop r12 + pop rbx + ret +sha1_compress endp + end diff --git a/sha2/build.rs b/sha2/build.rs index 4fd331f..66689d3 100644 --- a/sha2/build.rs +++ b/sha2/build.rs @@ -3,12 +3,15 @@ fn main() { let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); + let target_family = env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let mut build256 = cc::Build::new(); let (sha256_path, sha512_path) = if target_arch == "x86" { ("src/sha256_x86.S", "src/sha512_x86.S") - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { ("src/sha256_x64.S", "src/sha512_x64.S") + } else if target_arch == "x86_64" && target_family == "windows" { + ("src/sha256_x64_masm.asm", "src/sha512_x64_masm.asm") } else if target_arch == "aarch64" && target_vendor == "apple" { build256.flag("-march=armv8-a+crypto"); ("src/sha256_aarch64_apple.S", "") diff --git a/sha2/src/sha256_x64_masm.asm b/sha2/src/sha256_x64_masm.asm new file mode 100644 index 0000000..6a3d94c --- /dev/null +++ b/sha2/src/sha256_x64_masm.asm @@ -0,0 +1,247 @@ +; +; SHA256 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 4 ebx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 4 edi no Temporary w-bit word used in the hash computation +; 4 esi no Temporary w-bit word used in the hash computation +; 8 rsp no x64 stack pointer +; 4 r8d yes SHA256 working variable A +; 4 r9d yes SHA256 working variable B +; 4 r10d yes SHA256 working variable C +; 4 r11d yes SHA256 working variable D +; 4 r12d no SHA256 working variable E +; 4 r13d no SHA256 working variable F +; 4 r14d no SHA256 working variable G +; 4 r15d no SHA256 working variable H +; 64 [rsp+0] no Circular buffer of most recent 16 message schedule items, 4 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*4]> + endm + +ROUNDTAIL macro a, b, c, d, e, f, g, h, k ; ebx = w[i] + ; temp1 = h + S1 + ch + k[i] + w[i] + ; temp2 = S0 + maj + ; (obj1) h -> temp1 + temp2 = h + S1 + ch + k[i] + w[i] + S0 + maj + ; (obj2) d -> d + temp1 + ; Part 0 + mov eax, e + mov edi, e + mov esi, e + ror eax, 6 + ror edi, 11 + ror esi, 25 + xor edi, esi + xor eax, edi ; eax = S1 + ; ch = (e & f) ^ (~e & g) = (g ^ (e & (f ^ g))) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~e is (1 + e) + ; ef + (1 + e)g = ef + g + eg = g + ef + eg = g + e(f + g) + mov edi, g + xor edi, f + and edi, e + xor edi, g ; edi = ch + lea eax, [eax + edi + k] ; eax = S1 + ch + k[i] + add h, eax ; h -> h + S1 + ch + k[i] + add h, ebx ; h -> h + S1 + ch + k[i] + w[i] = temp1 + ; Part 1 + add d, h ; d -> d + temp1 (obj2) + ; Part 2 + mov eax, a + mov edi, a + mov esi, a + ror eax, 2 + ror edi, 13 + ror esi, 22 + xor edi, esi + xor eax, edi ; eax = S0 + add h, eax ; h -> temp1 + S0 + ; maj = (a and b) xor (a and c) xor (b and c) = (a and (b or c)) or (b and c) + ; https://www.wolframalpha.com/input?i=simplify+%28A+%26%26+B%29+xor+%28A+%26%26+C%29+xor+%28B+%26%26+C%29 + mov edi, c + mov eax, c + or eax, b + and edi, b + and eax, a + or eax, edi ; eax = maj + add h, eax ; h -> temp1 + S0 + maj = temp1 + temp2 (obj1) + endm + +ROUND macro i, a, b, c, d, e, f, g, h, k + +if i LT 16 + + mov ebx, [rcx + i*4] + bswap ebx + mov SCHED(i), ebx + +else + + ; (obj) w[i] -> w[i-16] + s0 + w[i-7] + s1 + mov ebx, SCHED(i - 16) ; ebx = w[i-16] + mov eax, SCHED(i - 15) + mov edi, eax + mov esi, eax + ror edi, 18 + shr esi, 3 + ror eax, 7 + xor edi, esi + xor eax, edi ; s0 = eax + add ebx, eax ; ebx = w[i-16] + s0 + add ebx, SCHED(i - 7) ; ebx = w[i-16] + s0 + w[i-7] + mov eax, SCHED(i - 2) + mov edi, eax + mov esi, eax + ror edi, 19 + shr esi, 10 + ror eax, 17 + xor edi, esi + xor eax, edi ; eax = s1 + add ebx, eax ; ebx = w[i-16] + s0 + w[i-7] + s1 + mov SCHED(i), ebx ; w[i] -> w[i-16] + s0 + w[i-7] + s1 (obj) + +endif + + ROUNDTAIL a, b, c, d, e, f, g, h, k ; ebx = w[i] + endm + + .code + ; void sha256_compress(const uint8_t block[64], uint32_t state[8]) + public sha256_compress +sha256_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + mov r12d, [rdx + 16] ; e + mov r13d, [rdx + 20] ; f + mov r14d, [rdx + 24] ; g + mov r15d, [rdx + 28] ; h + + ; 64 rounds of hashing + ROUND 0, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 428A2F98h + ROUND 1, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 71374491h + ROUND 2, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -4A3F0431h + ROUND 3, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -164A245Bh + ROUND 4, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 3956C25Bh + ROUND 5, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 59F111F1h + ROUND 6, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -6DC07D5Ch + ROUND 7, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -54E3A12Bh + ROUND 8, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -27F85568h + ROUND 9, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 12835B01h + ROUND 10, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 243185BEh + ROUND 11, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 550C7DC3h + ROUND 12, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 72BE5D74h + ROUND 13, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -7F214E02h + ROUND 14, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -6423F959h + ROUND 15, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -3E640E8Ch + ROUND 16, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -1B64963Fh + ROUND 17, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -1041B87Ah + ROUND 18, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0FC19DC6h + ROUND 19, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 240CA1CCh + ROUND 20, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 2DE92C6Fh + ROUND 21, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 4A7484AAh + ROUND 22, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 5CB0A9DCh + ROUND 23, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 76F988DAh + ROUND 24, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -67C1AEAEh + ROUND 25, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -57CE3993h + ROUND 26, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -4FFCD838h + ROUND 27, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -40A68039h + ROUND 28, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -391FF40Dh + ROUND 29, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -2A586EB9h + ROUND 30, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 06CA6351h + ROUND 31, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 14292967h + ROUND 32, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 27B70A85h + ROUND 33, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 2E1B2138h + ROUND 34, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 4D2C6DFCh + ROUND 35, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 53380D13h + ROUND 36, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 650A7354h + ROUND 37, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 766A0ABBh + ROUND 38, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -7E3D36D2h + ROUND 39, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -6D8DD37Bh + ROUND 40, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -5D40175Fh + ROUND 41, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -57E599B5h + ROUND 42, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -3DB47490h + ROUND 43, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -3893AE5Dh + ROUND 44, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -2E6D17E7h + ROUND 45, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -2966F9DCh + ROUND 46, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0BF1CA7Bh + ROUND 47, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 106AA070h + ROUND 48, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 19A4C116h + ROUND 49, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 1E376C08h + ROUND 50, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 2748774Ch + ROUND 51, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 34B0BCB5h + ROUND 52, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 391C0CB3h + ROUND 53, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 4ED8AA4Ah + ROUND 54, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 5B9CCA4Fh + ROUND 55, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 682E6FF3h + ROUND 56, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 748F82EEh + ROUND 57, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 78A5636Fh + ROUND 58, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -7B3787ECh + ROUND 59, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -7338FDF8h + ROUND 60, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -6F410006h + ROUND 61, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -5BAF9315h + ROUND 62, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -41065C09h + ROUND 63, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -398E870Eh + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + add [rdx + 16], r12d + add [rdx + 20], r13d + add [rdx + 24], r14d + add [rdx + 28], r15d + + ; Restore nonvolatile registers + add rsp, 64 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + ret +sha256_compress endp + end diff --git a/sha2/src/sha512_x64_masm.asm b/sha2/src/sha512_x64_masm.asm new file mode 100644 index 0000000..18c8aee --- /dev/null +++ b/sha2/src/sha512_x64_masm.asm @@ -0,0 +1,265 @@ +; +; SHA512 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 8 rax yes Temporary w-bit word used in the hash computation +; 8 rbx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 8 rdi no Temporary w-bit word used in the hash computation +; 8 rsi no Temporary w-bit word used in the hash computation +; 8 rsp no x64 stack pointer +; 8 r8 yes SHA512 working variable A +; 8 r9 yes SHA512 working variable B +; 8 r10 yes SHA512 working variable C +; 8 r11 yes SHA512 working variable D +; 8 r12 no SHA512 working variable E +; 8 r13 no SHA512 working variable F +; 8 r14 no SHA512 working variable G +; 8 r15 no SHA512 working variable H +; 128 [rsp+0] no Circular buffer of most recent 16 message schedule items, 8 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*8]> + endm + +ROUNDTAIL macro a, b, c, d, e, f, g, h, k ; rbx = w[i] + ; temp1 = h + S1 + ch + k[i] + w[i] + ; temp2 = S0 + maj + ; (obj1) h -> temp1 + temp2 = h + S1 + ch + k[i] + w[i] + S0 + maj + ; (obj2) d -> d + temp1 + ; Part 0 + mov rax, e + mov rdi, e + mov rsi, e + ror rax, 14 + ror rdi, 18 + ror rsi, 41 + xor rdi, rsi + xor rax, rdi ; rax = S1 + ; ch = (e & f) ^ (~e & g) = (g ^ (e & (f ^ g))) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~e is (1 + e) + ; ef + (1 + e)g = ef + g + eg = g + ef + eg = g + e(f + g) + mov rdi, g + xor rdi, f + and rdi, e + xor rdi, g ; rdi = ch + add h, rax ; h -> h + S1 + add h, rdi ; h -> h + S1 + ch + mov rax, k + add h, rax ; h -> h + S1 + ch + k[i] + add h, rbx ; h -> h + S1 + ch + k[i] + w[i] = temp1 + ; Part 1 + add d, h ; d -> d + temp1 (obj2) + ; Part 2 + mov rax, a + mov rdi, a + mov rsi, a + ror rax, 28 + ror rdi, 34 + ror rsi, 39 + xor rdi, rsi + xor rax, rdi ; rax = S0 + add h, rax ; h -> temp1 + S0 + ; maj = (a and b) xor (a and c) xor (b and c) = (a and (b or c)) or (b and c) + ; https://www.wolframalpha.com/input?i=simplify+%28A+%26%26+B%29+xor+%28A+%26%26+C%29+xor+%28B+%26%26+C%29 + mov rdi, c + mov rax, c + or rax, b + and rdi, b + and rax, a + or rax, rdi ; rax = maj + add h, rax ; h -> temp1 + S0 + maj = temp1 + temp2 (obj1) + endm + +ROUND macro i, a, b, c, d, e, f, g, h, k + +if i LT 16 + + mov rbx, [rcx + i*8] + bswap rbx + mov SCHED(i), rbx + +else + + ; (obj) w[i] -> w[i-16] + s0 + w[i-7] + s1 + mov rbx, SCHED(i - 16) ; rbx = w[i-16] + mov rax, SCHED(i - 15) + mov rdi, rax + mov rsi, rax + ror rdi, 8 + shr rsi, 7 + ror rax, 1 + xor rdi, rsi + xor rax, rdi ; s0 = rax + add rbx, rax ; rbx = w[i-16] + s0 + add rbx, SCHED(i - 7) ; rbx = w[i-16] + s0 + w[i-7] + mov rax, SCHED(i - 2) + mov rdi, rax + mov rsi, rax + ror rdi, 61 + shr rsi, 6 + ror rax, 19 + xor rdi, rsi + xor rax, rdi ; rax = s1 + add rbx, rax ; rbx = w[i-16] + s0 + w[i-7] + s1 + mov SCHED(i), rbx ; w[i] -> w[i-16] + s0 + w[i-7] + s1 (obj) + +endif + + ROUNDTAIL a, b, c, d, e, f, g, h, k ; rbx = w[i] + endm + + .code + ; void sha512_compress(const uint8_t block[128], uint64_t state[8]) + public sha512_compress +sha512_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 128 + + ; Initialize working variables with previous hash value + mov r8, [rdx] ; a + mov r9, [rdx + 8] ; b + mov r10, [rdx + 16] ; c + mov r11, [rdx + 24] ; d + mov r12, [rdx + 32] ; e + mov r13, [rdx + 40] ; f + mov r14, [rdx + 48] ; g + mov r15, [rdx + 56] ; h + + ; 80 rounds of hashing + ROUND 0, r8, r9, r10, r11, r12, r13, r14, r15, 0428A2F98D728AE22h + ROUND 1, r15, r8, r9, r10, r11, r12, r13, r14, 07137449123EF65CDh + ROUND 2, r14, r15, r8, r9, r10, r11, r12, r13, 0B5C0FBCFEC4D3B2Fh + ROUND 3, r13, r14, r15, r8, r9, r10, r11, r12, 0E9B5DBA58189DBBCh + ROUND 4, r12, r13, r14, r15, r8, r9, r10, r11, 03956C25BF348B538h + ROUND 5, r11, r12, r13, r14, r15, r8, r9, r10, 059F111F1B605D019h + ROUND 6, r10, r11, r12, r13, r14, r15, r8, r9, 0923F82A4AF194F9Bh + ROUND 7, r9, r10, r11, r12, r13, r14, r15, r8, 0AB1C5ED5DA6D8118h + ROUND 8, r8, r9, r10, r11, r12, r13, r14, r15, 0D807AA98A3030242h + ROUND 9, r15, r8, r9, r10, r11, r12, r13, r14, 012835B0145706FBEh + ROUND 10, r14, r15, r8, r9, r10, r11, r12, r13, 0243185BE4EE4B28Ch + ROUND 11, r13, r14, r15, r8, r9, r10, r11, r12, 0550C7DC3D5FFB4E2h + ROUND 12, r12, r13, r14, r15, r8, r9, r10, r11, 072BE5D74F27B896Fh + ROUND 13, r11, r12, r13, r14, r15, r8, r9, r10, 080DEB1FE3B1696B1h + ROUND 14, r10, r11, r12, r13, r14, r15, r8, r9, 09BDC06A725C71235h + ROUND 15, r9, r10, r11, r12, r13, r14, r15, r8, 0C19BF174CF692694h + ROUND 16, r8, r9, r10, r11, r12, r13, r14, r15, 0E49B69C19EF14AD2h + ROUND 17, r15, r8, r9, r10, r11, r12, r13, r14, 0EFBE4786384F25E3h + ROUND 18, r14, r15, r8, r9, r10, r11, r12, r13, 00FC19DC68B8CD5B5h + ROUND 19, r13, r14, r15, r8, r9, r10, r11, r12, 0240CA1CC77AC9C65h + ROUND 20, r12, r13, r14, r15, r8, r9, r10, r11, 02DE92C6F592B0275h + ROUND 21, r11, r12, r13, r14, r15, r8, r9, r10, 04A7484AA6EA6E483h + ROUND 22, r10, r11, r12, r13, r14, r15, r8, r9, 05CB0A9DCBD41FBD4h + ROUND 23, r9, r10, r11, r12, r13, r14, r15, r8, 076F988DA831153B5h + ROUND 24, r8, r9, r10, r11, r12, r13, r14, r15, 0983E5152EE66DFABh + ROUND 25, r15, r8, r9, r10, r11, r12, r13, r14, 0A831C66D2DB43210h + ROUND 26, r14, r15, r8, r9, r10, r11, r12, r13, 0B00327C898FB213Fh + ROUND 27, r13, r14, r15, r8, r9, r10, r11, r12, 0BF597FC7BEEF0EE4h + ROUND 28, r12, r13, r14, r15, r8, r9, r10, r11, 0C6E00BF33DA88FC2h + ROUND 29, r11, r12, r13, r14, r15, r8, r9, r10, 0D5A79147930AA725h + ROUND 30, r10, r11, r12, r13, r14, r15, r8, r9, 006CA6351E003826Fh + ROUND 31, r9, r10, r11, r12, r13, r14, r15, r8, 0142929670A0E6E70h + ROUND 32, r8, r9, r10, r11, r12, r13, r14, r15, 027B70A8546D22FFCh + ROUND 33, r15, r8, r9, r10, r11, r12, r13, r14, 02E1B21385C26C926h + ROUND 34, r14, r15, r8, r9, r10, r11, r12, r13, 04D2C6DFC5AC42AEDh + ROUND 35, r13, r14, r15, r8, r9, r10, r11, r12, 053380D139D95B3DFh + ROUND 36, r12, r13, r14, r15, r8, r9, r10, r11, 0650A73548BAF63DEh + ROUND 37, r11, r12, r13, r14, r15, r8, r9, r10, 0766A0ABB3C77B2A8h + ROUND 38, r10, r11, r12, r13, r14, r15, r8, r9, 081C2C92E47EDAEE6h + ROUND 39, r9, r10, r11, r12, r13, r14, r15, r8, 092722C851482353Bh + ROUND 40, r8, r9, r10, r11, r12, r13, r14, r15, 0A2BFE8A14CF10364h + ROUND 41, r15, r8, r9, r10, r11, r12, r13, r14, 0A81A664BBC423001h + ROUND 42, r14, r15, r8, r9, r10, r11, r12, r13, 0C24B8B70D0F89791h + ROUND 43, r13, r14, r15, r8, r9, r10, r11, r12, 0C76C51A30654BE30h + ROUND 44, r12, r13, r14, r15, r8, r9, r10, r11, 0D192E819D6EF5218h + ROUND 45, r11, r12, r13, r14, r15, r8, r9, r10, 0D69906245565A910h + ROUND 46, r10, r11, r12, r13, r14, r15, r8, r9, 0F40E35855771202Ah + ROUND 47, r9, r10, r11, r12, r13, r14, r15, r8, 0106AA07032BBD1B8h + ROUND 48, r8, r9, r10, r11, r12, r13, r14, r15, 019A4C116B8D2D0C8h + ROUND 49, r15, r8, r9, r10, r11, r12, r13, r14, 01E376C085141AB53h + ROUND 50, r14, r15, r8, r9, r10, r11, r12, r13, 02748774CDF8EEB99h + ROUND 51, r13, r14, r15, r8, r9, r10, r11, r12, 034B0BCB5E19B48A8h + ROUND 52, r12, r13, r14, r15, r8, r9, r10, r11, 0391C0CB3C5C95A63h + ROUND 53, r11, r12, r13, r14, r15, r8, r9, r10, 04ED8AA4AE3418ACBh + ROUND 54, r10, r11, r12, r13, r14, r15, r8, r9, 05B9CCA4F7763E373h + ROUND 55, r9, r10, r11, r12, r13, r14, r15, r8, 0682E6FF3D6B2B8A3h + ROUND 56, r8, r9, r10, r11, r12, r13, r14, r15, 0748F82EE5DEFB2FCh + ROUND 57, r15, r8, r9, r10, r11, r12, r13, r14, 078A5636F43172F60h + ROUND 58, r14, r15, r8, r9, r10, r11, r12, r13, 084C87814A1F0AB72h + ROUND 59, r13, r14, r15, r8, r9, r10, r11, r12, 08CC702081A6439ECh + ROUND 60, r12, r13, r14, r15, r8, r9, r10, r11, 090BEFFFA23631E28h + ROUND 61, r11, r12, r13, r14, r15, r8, r9, r10, 0A4506CEBDE82BDE9h + ROUND 62, r10, r11, r12, r13, r14, r15, r8, r9, 0BEF9A3F7B2C67915h + ROUND 63, r9, r10, r11, r12, r13, r14, r15, r8, 0C67178F2E372532Bh + ROUND 64, r8, r9, r10, r11, r12, r13, r14, r15, 0CA273ECEEA26619Ch + ROUND 65, r15, r8, r9, r10, r11, r12, r13, r14, 0D186B8C721C0C207h + ROUND 66, r14, r15, r8, r9, r10, r11, r12, r13, 0EADA7DD6CDE0EB1Eh + ROUND 67, r13, r14, r15, r8, r9, r10, r11, r12, 0F57D4F7FEE6ED178h + ROUND 68, r12, r13, r14, r15, r8, r9, r10, r11, 006F067AA72176FBAh + ROUND 69, r11, r12, r13, r14, r15, r8, r9, r10, 00A637DC5A2C898A6h + ROUND 70, r10, r11, r12, r13, r14, r15, r8, r9, 0113F9804BEF90DAEh + ROUND 71, r9, r10, r11, r12, r13, r14, r15, r8, 01B710B35131C471Bh + ROUND 72, r8, r9, r10, r11, r12, r13, r14, r15, 028DB77F523047D84h + ROUND 73, r15, r8, r9, r10, r11, r12, r13, r14, 032CAAB7B40C72493h + ROUND 74, r14, r15, r8, r9, r10, r11, r12, r13, 03C9EBE0A15C9BEBCh + ROUND 75, r13, r14, r15, r8, r9, r10, r11, r12, 0431D67C49C100D4Ch + ROUND 76, r12, r13, r14, r15, r8, r9, r10, r11, 04CC5D4BECB3E42B6h + ROUND 77, r11, r12, r13, r14, r15, r8, r9, r10, 0597F299CFC657E2Ah + ROUND 78, r10, r11, r12, r13, r14, r15, r8, r9, 05FCB6FAB3AD6FAECh + ROUND 79, r9, r10, r11, r12, r13, r14, r15, r8, 06C44198C4A475817h + + ; Compute intermediate hash value + add [rdx] , r8 + add [rdx + 8], r9 + add [rdx + 16], r10 + add [rdx + 24], r11 + add [rdx + 32], r12 + add [rdx + 40], r13 + add [rdx + 48], r14 + add [rdx + 56], r15 + + ; Restore nonvolatile registers + add rsp, 128 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + ret +sha512_compress endp + end diff --git a/whirlpool/build.rs b/whirlpool/build.rs index 88d3542..1d5d80c 100644 --- a/whirlpool/build.rs +++ b/whirlpool/build.rs @@ -1,10 +1,13 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else { panic!("Unsupported target architecture"); };