Skip to content

Commit

Permalink
Merge pull request #1645 from Gorachya/master
Browse files Browse the repository at this point in the history
SM9算法优化
  • Loading branch information
guanzhi authored Mar 20, 2024
2 parents 31440f9 + ce7f9a2 commit 4a7c65e
Show file tree
Hide file tree
Showing 15 changed files with 10,742 additions and 90 deletions.
21 changes: 16 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,13 @@ set(src
src/sm2_z256_sign.c
src/sm2_lib.c
src/sm2_ctx.c
src/sm9_alg.c
src/sm9_key.c
src/sm9_lib.c
#src/sm9_alg.c
#src/sm9_key.c
#src/sm9_lib.c
src/sm9_z256_alg.c
src/sm9_z256_key.c
src/sm9_z256_lib.c
src/sm9_z256_table.c
src/zuc.c
src/zuc_modes.c
src/hash_drbg.c
Expand Down Expand Up @@ -127,7 +131,8 @@ set(tests
sm2_z256
sm2_sign
sm2_enc
sm9
#sm9
sm9_z256
zuc
hash_drbg
block_cipher
Expand Down Expand Up @@ -248,6 +253,13 @@ if (ENABLE_SM2_Z256_ARMV8)
list(APPEND src src/sm2_z256_armv8.S)
endif()

option(ENABLE_SM9_Z256_ARMV8 "Enable SM9_Z256 ARMv8 assembly" OFF)
if (ENABLE_SM9_Z256_ARMV8)
message(STATUS "ENABLE_SM9_Z256_ARMV8 is ON")
add_definitions(-DENABLE_SM9_Z256_ARMV8)
enable_language(ASM)
list(APPEND src src/sm9_z256_armv8.S)
endif()

option(ENABLE_SM2_PRIVATE_KEY_EXPORT "Enable export un-encrypted SM2 private key" OFF)
if (ENABLE_SM2_PRIVATE_KEY_EXPORT)
Expand Down Expand Up @@ -570,4 +582,3 @@ string(CONCAT CPACK_PACKAGE_DESCRIPTION_SUMMARY
set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE")
set(CPACK_NSIS_MODIFY_PATH ON)
include(CPack)

5 changes: 5 additions & 0 deletions include/gmssl/sm9.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ void sm9_print_bn(const char *prefix, const sm9_bn_t a); // 标准打印格式


typedef sm9_bn_t sm9_fp_t;
extern const sm9_fp_t SM9_P;
extern const sm9_fp_t SM9_N;

#define sm9_fp_init(r) sm9_fp_set_zero(r)
#define sm9_fp_clean(f) sm9_fp_set_zero(r)
Expand Down Expand Up @@ -79,6 +81,9 @@ void sm9_fp_div2(sm9_fp_t r, const sm9_fp_t a);
int sm9_fp_from_bytes(sm9_fp_t r, const uint8_t buf[32]);
int sm9_fp_from_hex(sm9_fp_t r, const char hex[64]);

void sm9_fp_to_mont(sm9_fp_t r, const sm9_fp_t a);
void sm9_fp_from_mont(sm9_fp_t r, const sm9_fp_t a);
void sm9_fp_mul_mont(sm9_fp_t r, const sm9_fp_t a, const sm9_fp_t b);

typedef sm9_bn_t sm9_fn_t;

Expand Down
488 changes: 488 additions & 0 deletions include/gmssl/sm9_z256.h

Large diffs are not rendered by default.

252 changes: 173 additions & 79 deletions src/sm9_alg.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,6 @@ int sm9_bn_cmp(const sm9_bn_t a, const sm9_bn_t b)
return 0;
}




void sm9_bn_copy(sm9_bn_t r, const sm9_bn_t a)
{
memcpy(r, a, sizeof(sm9_bn_t));
Expand Down Expand Up @@ -328,6 +325,87 @@ void sm9_barrett_bn_sub(sm9_barrett_bn_t ret, const sm9_barrett_bn_t a, const sm
}
}

// w = -p^-1 mod 2^256 = 0xafd2bac5558a13b3966a4b291522b137181ae39613c8dbaf892bc42c2f2ee42b
// 2^512 mod p = 0x2ea795a656f62fbde479b522d6706e7b88f8105fae1a5d3f27dea312b417e2d2
const sm9_bn_t SM9_W = {0x2f2ee42b, 0x892bc42c, 0x13c8dbaf, 0x181ae396, 0x1522b137, 0x966a4b29, 0x558a13b3, 0xafd2bac5};
const sm9_bn_t SM9_2e512modp = {0xb417e2d2, 0x27dea312, 0xae1a5d3f, 0x88f8105f, 0xd6706e7b, 0xe479b522, 0x56f62fbd, 0x2ea795a6};

void sm9_fp_to_mont(sm9_fp_t r, const sm9_fp_t a)
{
sm9_fp_mul_mont(r, a, SM9_2e512modp);
}

void sm9_fp_from_mont(sm9_fp_t r, const sm9_fp_t a)
{
sm9_fp_mul_mont(r, a, SM9_ONE);
}

void sm9_bn_mul(sm9_bn_t r1, sm9_bn_t r2, const sm9_bn_t a, const sm9_bn_t b)
{
int i, j;
uint64_t s[16];
uint64_t w;

for (i = 0; i < 16; i++) {
s[i] = 0;
}

for (i = 0; i < 8; i++) {
w = 0;
for (j = 0; j < 8; j++) {
w += s[i + j] + a[i] * b[j];
s[i + j] = w & 0xffffffff;
w >>= 32;
}
s[i + 8] = w;
}
for (i = 0; i < 8; i++) {
r1[i] = s[i]; // low 256
r2[i] = s[i + 8]; // high 256
}
}

void sm9_bn_add_512(sm9_bn_t r1, sm9_bn_t r2,
const sm9_bn_t a1, const sm9_bn_t a2,
const sm9_bn_t b1, const sm9_bn_t b2)
{
int i, over;

r1[0] = a1[0] + b1[0];
for (i = 1; i < 8; i++) {
r1[i] = a1[i] + b1[i] + (r1[i-1] >> 32);
}
r2[0] = a2[0] + b2[0] + (r1[7] >> 32);
for (i = 1; i < 8; i++) {
r2[i] = a2[i] + b2[i] + (r2[i-1] >> 32);
}
for (i = 0; i < 7; i++) {
r1[i] &= 0xffffffff;
r2[i] &= 0xffffffff;
}
r1[7] &= 0xffffffff;
}

void sm9_fp_mul_mont(sm9_fp_t r, const sm9_fp_t a, const sm9_fp_t b)
{
sm9_bn_t z1, z2, u1, u2;
int i, j;

// z = x * y mod 2^256
sm9_bn_mul(z1, z2, a, b);

// u = z * w mod 2^256
sm9_bn_mul(u1, u2, z1, SM9_W);

// s = (x * y + u * p)
sm9_bn_mul(u1, u2, u1, SM9_P);
sm9_bn_add_512(z1, r, z1, z2, u1, u2);

if (sm9_bn_cmp(r, SM9_P) >= 0) {
sm9_bn_sub(r, r, SM9_P);
}
}

void sm9_fp_mul(sm9_fp_t r, const sm9_fp_t a, const sm9_fp_t b)
{
uint64_t s[18];
Expand Down Expand Up @@ -583,21 +661,36 @@ void sm9_fp2_neg(sm9_fp2_t r, const sm9_fp2_t a)
sm9_fp_neg(r[1], a[1]);
}

void sm9_fp2_a_mul_u(sm9_fp2_t r, sm9_fp2_t a) {
sm9_fp_t r0;

sm9_fp_dbl(r0, a[1]);
sm9_fp_neg(r0, r0);

sm9_fp_copy(r[1], a[0]);
sm9_fp_copy(r[0], r0);
}

void sm9_fp2_mul(sm9_fp2_t r, const sm9_fp2_t a, const sm9_fp2_t b)
{
sm9_fp_t r0, r1, t;

sm9_fp_add(r0, a[0], a[1]);
sm9_fp_add(t, b[0], b[1]);
sm9_fp_mul(r1, t, r0);

// r0 = a0 * b0 - 2 * a1 * b1
sm9_fp_mul(r0, a[0], b[0]);
sm9_fp_mul(t, a[1], b[1]);

// r1 = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1
sm9_fp_sub(r1, r1, r0);
sm9_fp_sub(r1, r1, t);

// r0
sm9_fp_dbl(t, t);
sm9_fp_sub(r0, r0, t);

// r1 = a0 * b1 + a1 * b0
sm9_fp_mul(r1, a[0], b[1]);
sm9_fp_mul(t, a[1], b[0]);
sm9_fp_add(r1, r1, t);

sm9_fp_copy(r[0], r0);
sm9_fp_copy(r[1], r1);
}
Expand Down Expand Up @@ -631,16 +724,17 @@ void sm9_fp2_mul_fp(sm9_fp2_t r, const sm9_fp2_t a, const sm9_fp_t k)

void sm9_fp2_sqr(sm9_fp2_t r, const sm9_fp2_t a)
{
sm9_fp_t r0, r1, t;

// a0^2 - 2 * a1^2
sm9_fp_sqr(r0, a[0]);
sm9_fp_sqr(t, a[1]);
sm9_fp_dbl(t, t);
sm9_fp_sub(r0, r0, t);
sm9_fp_t r0, r1, c0, c1;

// r0 = (a0 + a1) * (a0 - 2a1) + a0 * a1
sm9_fp_mul(r1, a[0], a[1]);
sm9_fp_add(c0, a[0], a[1]);
sm9_fp_dbl(c1, a[1]);
sm9_fp_sub(c1, a[0], c1);
sm9_fp_mul(r0, c0, c1);
sm9_fp_add(r0, r0, r1);

// r1 = 2 * a0 * a1
sm9_fp_mul(r1, a[0], a[1]);
sm9_fp_dbl(r1, r1);

sm9_bn_copy(r[0], r0);
Expand Down Expand Up @@ -835,18 +929,38 @@ void sm9_fp4_neg(sm9_fp4_t r, const sm9_fp4_t a)
sm9_fp2_neg(r[1], a[1]);
}

void sm9_fp4_div2(sm9_fp4_t r, const sm9_fp4_t a)
{
sm9_fp2_div2(r[0], a[0]);
sm9_fp2_div2(r[1], a[1]);
}

void sm9_fp4_a_mul_v(sm9_fp4_t r, sm9_fp4_t a) {
sm9_fp2_t r0;

sm9_fp2_a_mul_u(r0, a[1]);

sm9_fp2_copy(r[1], a[0]);
sm9_fp2_copy(r[0], r0);
}

void sm9_fp4_mul(sm9_fp4_t r, const sm9_fp4_t a, const sm9_fp4_t b)
{
sm9_fp2_t r0, r1, t;


sm9_fp2_add(r0, a[0], a[1]);
sm9_fp2_add(t, b[0], b[1]);
sm9_fp2_mul(r1, t, r0);

sm9_fp2_mul(r0, a[0], b[0]);
sm9_fp2_mul_u(t, a[1], b[1]);
sm9_fp2_mul(t, a[1], b[1]);

sm9_fp2_sub(r1, r1, r0);
sm9_fp2_sub(r1, r1, t);

sm9_fp2_a_mul_u(t, t);
sm9_fp2_add(r0, r0, t);

sm9_fp2_mul(r1, a[0], b[1]);
sm9_fp2_mul(t, a[1], b[0]);
sm9_fp2_add(r1, r1, t);

sm9_fp2_copy(r[0], r0);
sm9_fp2_copy(r[1], r1);
}
Expand Down Expand Up @@ -883,12 +997,18 @@ void sm9_fp4_sqr(sm9_fp4_t r, const sm9_fp4_t a)
{
sm9_fp2_t r0, r1, t;

sm9_fp2_add(r1, a[0], a[1]);
sm9_fp2_sqr(r1, r1);

sm9_fp2_sqr(r0, a[0]);
sm9_fp2_sqr_u(t, a[1]);
sm9_fp2_sqr(t, a[1]);

sm9_fp2_sub(r1, r1, r0);
sm9_fp2_sub(r1, r1, t);

sm9_fp2_a_mul_u(t, t);
sm9_fp2_add(r0, r0, t);

sm9_fp2_mul(r1, a[0], a[1]);
sm9_fp2_dbl(r1, r1);

sm9_fp2_copy(r[0], r0);
sm9_fp2_copy(r[1], r1);
}
Expand Down Expand Up @@ -1101,25 +1221,34 @@ void sm9_fp12_neg(sm9_fp12_t r, const sm9_fp12_t a)

void sm9_fp12_mul(sm9_fp12_t r, const sm9_fp12_t a, const sm9_fp12_t b)
{
sm9_fp4_t r0, r1, r2, t;

sm9_fp4_mul(r0, a[0], b[0]);
sm9_fp4_mul_v(t, a[1], b[2]);
sm9_fp4_add(r0, r0, t);
sm9_fp4_mul_v(t, a[2], b[1]);
sm9_fp4_add(r0, r0, t);

sm9_fp4_mul(r1, a[0], b[1]);
sm9_fp4_mul(t, a[1], b[0]);
sm9_fp4_add(r1, r1, t);
sm9_fp4_mul_v(t, a[2], b[2]);
sm9_fp4_add(r1, r1, t);
sm9_fp4_t r0, r1, r2, t, k0, k1, m0, m1, m2;

sm9_fp4_mul(r2, a[0], b[2]);
sm9_fp4_mul(t, a[1], b[1]);
sm9_fp4_add(r2, r2, t);
sm9_fp4_mul(t, a[2], b[0]);
sm9_fp4_add(r2, r2, t);
sm9_fp4_mul(m0, a[0], b[0]);
sm9_fp4_mul(m1, a[1], b[1]);
sm9_fp4_mul(m2, a[2], b[2]);

sm9_fp4_add(k0, a[1], a[2]);
sm9_fp4_add(k1, b[1], b[2]);
sm9_fp4_mul(t, k0, k1);
sm9_fp4_sub(t, t, m1);
sm9_fp4_sub(t, t, m2);
sm9_fp4_a_mul_v(t, t);
sm9_fp4_add(r0, t, m0);

sm9_fp4_add(k0, a[0], a[2]);
sm9_fp4_add(k1, b[0], b[2]);
sm9_fp4_mul(t, k0, k1);
sm9_fp4_sub(t, t, m0);
sm9_fp4_sub(t, t, m2);
sm9_fp4_add(r2, t, m1);

sm9_fp4_add(k0, a[0], a[1]);
sm9_fp4_add(k1, b[0], b[1]);
sm9_fp4_mul(t, k0, k1);
sm9_fp4_sub(t, t, m0);
sm9_fp4_sub(t, t, m1);
sm9_fp4_a_mul_v(m2, m2);
sm9_fp4_add(r1, t, m2);

sm9_fp4_copy(r[0], r0);
sm9_fp4_copy(r[1], r1);
Expand Down Expand Up @@ -1150,41 +1279,6 @@ void sm9_fp12_mul(sm9_fp12_t r, const sm9_fp12_t a, const sm9_fp12_t b)
// sm9_fp4_copy(r[2], r2);
// }

void sm9_fp4_div2(sm9_fp4_t r, const sm9_fp4_t a)
{
sm9_fp2_div2(r[0], a[0]);
sm9_fp2_div2(r[1], a[1]);
}

void sm9_fp2_a_mul_u(sm9_fp2_t r, sm9_fp2_t a) {
sm9_fp_t r0, a0, a1;

sm9_fp_copy(a0, a[0]);
sm9_fp_copy(a1, a[1]);

//r0 = -2 * a1
sm9_fp_dbl(r0, a1);
sm9_fp_neg(r0, r0);
sm9_fp_copy(r[0], r0);

//r1 = a0
sm9_fp_copy(r[1], a0);
}

void sm9_fp4_a_mul_v(sm9_fp4_t r, sm9_fp4_t a) {
sm9_fp2_t r0, a0, a1;

sm9_fp2_copy(a0, a[0]);
sm9_fp2_copy(a1, a[1]);

//r0 = a1 * u
sm9_fp2_a_mul_u(r0, a1);
sm9_fp2_copy(r[0], r0);

//r1 = a0
sm9_fp2_copy(r[1], a0);
}

void sm9_fp12_sqr(sm9_fp12_t r, const sm9_fp12_t a)
{
sm9_fp4_t h0, h1, h2, t;
Expand Down
Loading

0 comments on commit 4a7c65e

Please sign in to comment.