From 762a44ae7181ff89c3bbbecac01beab6d334b6c6 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Wed, 13 Mar 2024 11:02:41 -0500 Subject: [PATCH] Add bignum_mont{sqr,mul}_p256_neon for Arm This patch adds `bignum_mont{sqr,mul}_p256_neon` functions. These are vectorized and instruction-rescheduled versions of `bignum_mont{sqr,mul}_p256`. They are verified using the equivalence checking tactics. A new bash script `tools/external/slothy.sh` is added to help reproduce the optimized output. The 'intermediate' functions of the two functions are written as comments in the two assembly files. Additionally, - A new instruction `umull2` is formalized add added to the simulator in order to verify the new functions. - Old `*_neon` functions' proofs are refactored a bit. --- arm/Makefile | 2 + arm/allowed_asm | 10 + arm/p256/Makefile | 2 + arm/p256/bignum_montmul_p256_neon.S | 461 +++++++++++ arm/p256/bignum_montsqr_p256_neon.S | 324 ++++++++ arm/proofs/arm.ml | 27 + arm/proofs/bignum_montmul_p256.ml | 122 ++- arm/proofs/bignum_montmul_p256_neon.ml | 820 +++++++++++++++++++ arm/proofs/bignum_montsqr_p256.ml | 88 +- arm/proofs/bignum_montsqr_p256_neon.ml | 874 ++++++++++++++++++++ arm/proofs/bignum_mul_8_16.ml | 19 +- arm/proofs/bignum_mul_8_16_neon.ml | 324 +++----- arm/proofs/bignum_sqr_8_16.ml | 19 +- arm/proofs/bignum_sqr_8_16_neon.ml | 322 +++----- arm/proofs/decode.ml | 9 +- arm/proofs/equiv.ml | 1040 ++++++++++++++++++------ arm/proofs/instruction.ml | 21 +- arm/proofs/neon_helper.ml | 92 ++- arm/proofs/simulator.ml | 4 + benchmarks/benchmark.c | 6 + common/misc.ml | 20 +- common/relational2.ml | 146 +++- include/s2n-bignum-c89.h | 2 + include/s2n-bignum.h | 2 + tests/test.c | 104 +-- tools/external/slothy.sh | 112 +++ 26 files changed, 4127 insertions(+), 845 deletions(-) create mode 100644 arm/p256/bignum_montmul_p256_neon.S create mode 100644 arm/p256/bignum_montsqr_p256_neon.S create mode 100644 arm/proofs/bignum_montmul_p256_neon.ml create mode 100644 arm/proofs/bignum_montsqr_p256_neon.ml create mode 100755 tools/external/slothy.sh diff --git a/arm/Makefile b/arm/Makefile index c592eb21..61fa2e16 100644 --- a/arm/Makefile +++ b/arm/Makefile @@ -244,8 +244,10 @@ BIGNUM_OBJ = curve25519/bignum_add_p25519.o \ p256/bignum_mod_p256_4.o \ p256/bignum_montmul_p256.o \ p256/bignum_montmul_p256_alt.o \ + p256/bignum_montmul_p256_neon.o \ p256/bignum_montsqr_p256.o \ p256/bignum_montsqr_p256_alt.o \ + p256/bignum_montsqr_p256_neon.o \ p256/bignum_mux_4.o \ p256/bignum_neg_p256.o \ p256/bignum_nonzero_4.o \ diff --git a/arm/allowed_asm b/arm/allowed_asm index 89c9dd87..85ca3e4a 100644 --- a/arm/allowed_asm +++ b/arm/allowed_asm @@ -24,6 +24,7 @@ : lsr$ : madd$ : mov$ +: movi$ : movk$ : mul$ : mvn$ @@ -33,14 +34,23 @@ : ngcs$ : orr$ : ret$ +: rev64$ : sbc$ : sbcs$ +: shl$ : stp$ : str$ : strb$ : sub$ : subs$ : tst$ +: uaddlp$ +: umlal$ : umulh$ : umull$ +: umull2$ +: usra$ +: uzp1$ +: uzp2$ +: xtn$ : $ diff --git a/arm/p256/Makefile b/arm/p256/Makefile index 6cdc3bae..9ea22174 100644 --- a/arm/p256/Makefile +++ b/arm/p256/Makefile @@ -35,8 +35,10 @@ OBJ = bignum_add_p256.o \ bignum_mod_p256_4.o \ bignum_montmul_p256.o \ bignum_montmul_p256_alt.o \ + bignum_montmul_p256_neon.o \ bignum_montsqr_p256.o \ bignum_montsqr_p256_alt.o \ + bignum_montsqr_p256_neon.o \ bignum_mux_4.o \ bignum_neg_p256.o \ bignum_nonzero_4.o \ diff --git a/arm/p256/bignum_montmul_p256_neon.S b/arm/p256/bignum_montmul_p256_neon.S new file mode 100644 index 00000000..71d629ce --- /dev/null +++ b/arm/p256/bignum_montmul_p256_neon.S @@ -0,0 +1,461 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery multiply, z := (x * y / 2^256) mod p_256 +// Inputs x[4], y[4]; output z[4] +// +// extern void bignum_montmul_p256_neon +// (uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4]); +// +// Does z := (2^{-256} * x * y) mod p_256, assuming that the inputs x and y +// satisfy x * y <= 2^256 * p_256 (in particular this is true if we are in +// the "usual" case x < p_256 and y < p_256). +// +// Standard ARM ABI: X0 = z, X1 = x, X2 = y +// ---------------------------------------------------------------------------- + +// bignum_montmul_p256_neon is functionally equivalent to bignum_montmul_p256. +// It is written in a way that +// 1. A subset of scalar multiplications in bignum_montmul_p256 are carefully +// chosen and vectorized +// 2. The vectorized assembly is rescheduled using the SLOTHY superoptimizer. +// https://github.com/slothy-optimizer/slothy +// +// The output program of step 1. is as follows: +// +// ldp x7, x13, [x1] +// ldr q16, [x1] +// ldp x9, x15, [x1, #16] +// ldp x14, x4, [x2] +// ldr q19, [x2] +// ldp x12, x16, [x2, #16] +// ldr q29, [x1, #16] +// ldr q30, [x2, #16] +// uzp1 v17.4S, v19.4S, v16.4S +// rev64 v18.4S, v19.4S +// uzp1 v28.4S, v16.4S, v16.4S +// mul v24.4S, v18.4S, v16.4S +// uaddlp v18.2D, v24.4S +// shl v16.2D, v18.2D, #32 +// umlal v16.2D, v28.2S, v17.2S +// mov x2, v16.d[0] +// mov x1, v16.d[1] +// umulh x5, x7, x14 +// adds x17, x2, x1 +// umulh x3, x13, x4 +// adcs x8, x5, x3 +// adcs x10, x3, xzr +// adds x5, x5, x17 +// adcs x1, x1, x8 +// adcs x8, x10, xzr +// subs x17, x7, x13 +// cneg x3, x17, cc +// csetm x11, cc +// subs x10, x4, x14 +// cneg x6, x10, cc +// mul x17, x3, x6 +// umulh x6, x3, x6 +// cinv x11, x11, cc +// eor x17, x17, x11 +// eor x3, x6, x11 +// cmn x11, #0x1 +// adcs x5, x5, x17 +// adcs x10, x1, x3 +// adc x1, x8, x11 +// lsl x3, x2, #32 +// subs x17, x2, x3 +// lsr x11, x2, #32 +// sbc x8, x2, x11 +// adds x2, x5, x3 +// adcs x6, x10, x11 +// adcs x3, x1, x17 +// adc x10, x8, xzr +// lsl x5, x2, #32 +// subs x17, x2, x5 +// lsr x11, x2, #32 +// sbc x8, x2, x11 +// adds x2, x6, x5 +// adcs x6, x3, x11 +// adcs x1, x10, x17 +// adc x17, x8, xzr +// stp x2, x6, [x0] // @slothy:writes=buffer0 +// stp x1, x17, [x0, #16] // @slothy:writes=buffer16 +// movi v28.2D, #0x00000000ffffffff +// uzp2 v22.4S, v30.4S, v30.4S +// xtn v4.2S, v29.2D +// xtn v27.2S, v30.2D +// rev64 v23.4S, v30.4S +// umull v17.2D, v4.2S, v27.2S +// umull v7.2D, v4.2S, v22.2S +// uzp2 v16.4S, v29.4S, v29.4S +// mul v29.4S, v23.4S, v29.4S +// usra v7.2D, v17.2D, #32 +// umull v30.2D, v16.2S, v22.2S +// uaddlp v20.2D, v29.4S +// and v18.16B, v7.16B, v28.16B +// umlal v18.2D, v16.2S, v27.2S +// shl v16.2D, v20.2D, #32 +// usra v30.2D, v7.2D, #32 +// umlal v16.2D, v4.2S, v27.2S +// usra v30.2D, v18.2D, #32 +// mov x11, v16.d[0] +// mov x5, v16.d[1] +// mov x2, v30.d[0] +// adds x3, x11, x5 +// mov x17, v30.d[1] +// adcs x8, x2, x17 +// adcs x1, x17, xzr +// adds x17, x2, x3 +// adcs x8, x5, x8 +// adcs x1, x1, xzr +// subs x2, x9, x15 +// cneg x6, x2, cc +// csetm x3, cc +// subs x2, x16, x12 +// cneg x5, x2, cc +// mul x10, x6, x5 +// umulh x5, x6, x5 +// cinv x3, x3, cc +// eor x10, x10, x3 +// eor x6, x5, x3 +// cmn x3, #0x1 +// adcs x2, x17, x10 +// adcs x6, x8, x6 +// adc x5, x1, x3 +// subs x7, x9, x7 +// sbcs x3, x15, x13 +// ngc x17, xzr +// cmn x17, #0x1 +// eor x8, x7, x17 +// adcs x13, x8, xzr +// eor x15, x3, x17 +// adcs x1, x15, xzr +// subs x9, x14, x12 +// sbcs x14, x4, x16 +// ngc x3, xzr +// cmn x3, #0x1 +// eor x12, x9, x3 +// adcs x7, x12, xzr +// eor x12, x14, x3 +// adcs x12, x12, xzr +// eor x10, x17, x3 +// ldp x4, x15, [x0] // @slothy:reads=buffer0 +// adds x17, x11, x4 +// adcs x16, x2, x15 +// ldp x3, x15, [x0, #16] // @slothy:reads=buffer16 +// adcs x11, x6, x3 +// adcs x9, x5, x15 +// adc x14, xzr, xzr +// mul x6, x13, x7 +// mul x8, x1, x12 +// umulh x5, x13, x7 +// adds x3, x6, x8 +// umulh x2, x1, x12 +// adcs x4, x5, x2 +// adcs x15, x2, xzr +// adds x3, x5, x3 +// adcs x4, x8, x4 +// adcs x15, x15, xzr +// subs x1, x13, x1 +// cneg x8, x1, cc +// csetm x5, cc +// subs x1, x12, x7 +// cneg x2, x1, cc +// mul x7, x8, x2 +// umulh x2, x8, x2 +// cinv x13, x5, cc +// eor x7, x7, x13 +// eor x2, x2, x13 +// cmn x13, #0x1 +// adcs x3, x3, x7 +// adcs x4, x4, x2 +// adc x5, x15, x13 +// cmn x10, #0x1 +// eor x8, x6, x10 +// adcs x15, x8, x17 +// eor x2, x3, x10 +// adcs x2, x2, x16 +// eor x6, x4, x10 +// adcs x3, x6, x11 +// eor x7, x5, x10 +// adcs x1, x7, x9 +// adcs x13, x14, x10 +// adcs x12, x10, xzr +// adc x10, x10, xzr +// adds x5, x3, x17 +// adcs x8, x1, x16 +// adcs x13, x13, x11 +// adcs x6, x12, x9 +// adc x4, x10, x14 +// lsl x9, x15, #32 +// subs x7, x15, x9 +// lsr x1, x15, #32 +// sbc x14, x15, x1 +// adds x10, x2, x9 +// adcs x15, x5, x1 +// adcs x5, x8, x7 +// adc x7, x14, xzr +// lsl x12, x10, #32 +// subs x17, x10, x12 +// lsr x9, x10, #32 +// sbc x3, x10, x9 +// adds x12, x15, x12 +// adcs x5, x5, x9 +// adcs x14, x7, x17 +// adc x2, x3, xzr +// adds x14, x13, x14 +// adcs x6, x6, x2 +// adc x17, x4, xzr +// add x7, x17, #0x1 +// lsl x16, x7, #32 +// adds x3, x6, x16 +// adc x1, x17, xzr +// neg x15, x7 +// sub x13, x16, #0x1 +// subs x9, x12, x15 +// sbcs x8, x5, x13 +// sbcs x15, x14, xzr +// sbcs x3, x3, x7 +// sbcs x7, x1, x7 +// adds x4, x9, x7 +// mov x6, #0xffffffff +// and x17, x6, x7 +// adcs x8, x8, x17 +// adcs x5, x15, xzr +// mov x10, #0xffffffff00000001 +// and x1, x10, x7 +// adc x12, x3, x1 +// stp x4, x8, [x0] // @slothy:writes=buffer0 +// stp x5, x12, [x0, #16] // @slothy:writes=buffer16 +// ret +// +// The bash script used for step 2 is as follows: +// +// # Store the assembly instructions except the last 'ret' as, say, 'input.S' +// export OUTPUTS="[hint_buffer0,hint_buffer16]" +// export RESERVED_REGS="[x18,x19,x20,x21,x22,x23,x24,x25,x26,x27,x28,x29,x30,sp,q8,q9,q10,q11,q12,q13,q14,q15,v8,v9,v10,v11,v12,v13,v14,v15]" +// /tools/external/slothy.sh input.S my_out_dir +// # my_out_dir/3.opt.s is the optimized assembly. Its output may differ +// # from this file since the sequence is non-deterministically chosen. +// # Please add 'ret' at the end of the output assembly. + + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p256_neon) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p256_neon) + .text + .balign 4 + +S2N_BN_SYMBOL(bignum_montmul_p256_neon): + + ldr q20, [x2] + ldp x7, x17, [x1] + ldr q0, [x1] + ldp x6, x10, [x2] + ldp x11, x15, [x1, #16] + rev64 v16.4S, v20.4S + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4S, v16.4S, v0.4S + umulh x12, x17, x10 + uzp1 v28.4S, v20.4S, v0.4S + subs x14, x11, x7 + ldr q20, [x2, #16] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2D, v16.4S + umulh x4, x7, x6 + uzp1 v21.4S, v0.4S, v0.4S + cneg x11, x8, cc + shl v17.2D, v27.2D, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2D, v21.2S, v28.2S + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [x1, #16] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2S, v20.2D + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4S, v20.4S, v20.4S + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2S, v28.2D + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [x2, #16] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x9, x3, x13 + adcs x3, x8, x7 + umulh x8, x14, x11 + umull v21.2D, v0.2S, v1.2S + adcs x12, x10, x12 + umull v3.2D, v0.2S, v16.2S + adc x15, x15, xzr + rev64 v24.4S, v20.4S + stp x12, x15, [x0, #16] + movi v2.2D, #0x00000000ffffffff + mul x10, x14, x11 + mul v4.4S, v24.4S, v28.4S + subs x13, x14, x5 + uzp2 v19.4S, v28.4S, v28.4S + csetm x15, cc + usra v3.2D, v21.2D, #32 + mul x7, x5, x1 + umull v21.2D, v19.2S, v16.2S + cneg x13, x13, cc + uaddlp v5.2D, v4.4S + subs x11, x1, x11 + and v16.16B, v3.16B, v2.16B + umulh x5, x5, x1 + shl v24.2D, v5.2D, #32 + cneg x11, x11, cc + umlal v16.2D, v19.2S, v1.2S + cinv x12, x15, cc + umlal v24.2D, v0.2S, v1.2S + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + stp x9, x3, [x0] + usra v21.2D, v3.2D, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2D, v16.2D, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + ldp x15, x8, [x0] + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + ldp x9, x13, [x0, #16] + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x15 + adcs x15, x16, x8 + eor x5, x17, x4 + adcs x9, x1, x9 + eor x1, x10, x5 + adcs x16, x2, x13 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x11, x11, x13 + and x1, x1, x13 + adcs x4, x4, x1 + and x1, x12, x13 + stp x11, x4, [x0] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [x0, #16] + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/arm/p256/bignum_montsqr_p256_neon.S b/arm/p256/bignum_montsqr_p256_neon.S new file mode 100644 index 00000000..20b37556 --- /dev/null +++ b/arm/p256/bignum_montsqr_p256_neon.S @@ -0,0 +1,324 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery square, z := (x^2 / 2^256) mod p_256 +// Input x[4]; output z[4] +// +// extern void bignum_montsqr_p256_neon +// (uint64_t z[static 4], uint64_t x[static 4]); +// +// Does z := (x^2 / 2^256) mod p_256, assuming x^2 <= 2^256 * p_256, which is +// guaranteed in particular if x < p_256 initially (the "intended" case). +// +// Standard ARM ABI: X0 = z, X1 = x +// ---------------------------------------------------------------------------- + +// bignum_montsqr_p256_neon is functionally equivalent to bignum_montsqr_p256. +// It is written in a way that +// 1. A subset of scalar multiplications in bignum_montsqr_p256 are carefully +// chosen and vectorized +// 2. The vectorized assembly is rescheduled using the SLOTHY superoptimizer. +// https://github.com/slothy-optimizer/slothy +// +// The output program of step 1. is as follows: +// +// ldp x7, x3, [x1] +// ldr q6, [x1] +// ldp x9, x8, [x1, #16] +// ldr q18, [x1, #16] +// ldr q27, [x1] +// umull v16.2D, v27.2S, v27.2S +// umull2 v17.2D, v27.4S, v27.4S +// xtn v30.2S, v27.2D +// uzp2 v27.4S, v27.4S, v27.4S +// umull v27.2D, v27.2S, v30.2S +// mov x6, v16.d[0] +// mov x12, v16.d[1] +// mov x13, v17.d[0] +// mov x1, v17.d[1] +// mov x15, v27.d[0] +// mov x10, v27.d[1] +// adds x4, x6, x15, lsl #33 +// lsr x6, x15, #31 +// adc x15, x12, x6 +// adds x13, x13, x10, lsl #33 +// lsr x6, x10, #31 +// adc x12, x1, x6 +// mul x6, x7, x3 +// umulh x1, x7, x3 +// adds x5, x15, x6, lsl #1 +// extr x6, x1, x6, #63 +// adcs x10, x13, x6 +// lsr x6, x1, #63 +// adc x15, x12, x6 +// lsl x6, x4, #32 +// subs x13, x4, x6 +// lsr x12, x4, #32 +// sbc x1, x4, x12 +// adds x6, x5, x6 +// adcs x5, x10, x12 +// adcs x10, x15, x13 +// adc x15, x1, xzr +// lsl x13, x6, #32 +// subs x12, x6, x13 +// lsr x1, x6, #32 +// sbc x6, x6, x1 +// adds x16, x5, x13 +// adcs x11, x10, x1 +// adcs x2, x15, x12 +// adc x17, x6, xzr +// uzp1 v30.4S, v18.4S, v6.4S +// rev64 v27.4S, v18.4S +// uzp1 v18.4S, v6.4S, v6.4S +// mul v27.4S, v27.4S, v6.4S +// uaddlp v5.2D, v27.4S +// shl v6.2D, v5.2D, #32 +// umlal v6.2D, v18.2S, v30.2S +// mov x4, v6.d[0] +// mov x5, v6.d[1] +// umulh x10, x7, x9 +// subs x6, x7, x3 +// cneg x13, x6, cc +// csetm x12, cc +// subs x6, x8, x9 +// cneg x6, x6, cc +// mul x1, x13, x6 +// umulh x6, x13, x6 +// cinv x15, x12, cc +// eor x12, x1, x15 +// eor x13, x6, x15 +// adds x1, x4, x10 +// adc x6, x10, xzr +// umulh x3, x3, x8 +// adds x1, x1, x5 +// adcs x6, x6, x3 +// adc x3, x3, xzr +// adds x6, x6, x5 +// adc x3, x3, xzr +// cmn x15, #0x1 +// adcs x12, x1, x12 +// adcs x1, x6, x13 +// adc x3, x3, x15 +// adds x6, x4, x4 +// adcs x13, x12, x12 +// adcs x12, x1, x1 +// adcs x1, x3, x3 +// adc x3, xzr, xzr +// adds x6, x6, x16 +// adcs x5, x13, x11 +// adcs x10, x12, x2 +// adcs x15, x1, x17 +// adc x13, x3, xzr +// lsl x3, x6, #32 +// subs x12, x6, x3 +// lsr x1, x6, #32 +// sbc x6, x6, x1 +// adds x3, x5, x3 +// adcs x5, x10, x1 +// adcs x15, x15, x12 +// adcs x13, x13, x6 +// adc x10, xzr, xzr +// lsl x6, x3, #32 +// subs x12, x3, x6 +// lsr x1, x3, #32 +// sbc x3, x3, x1 +// adds x6, x5, x6 +// adcs x15, x15, x1 +// adcs x13, x13, x12 +// adcs x12, x10, x3 +// adc x1, xzr, xzr +// mul x3, x9, x9 +// adds x5, x6, x3 +// mul x6, x8, x8 +// umulh x3, x9, x9 +// adcs x15, x15, x3 +// adcs x13, x13, x6 +// umulh x3, x8, x8 +// adcs x12, x12, x3 +// adc x1, x1, xzr +// mul x6, x9, x8 +// umulh x3, x9, x8 +// adds x8, x6, x6 +// adcs x9, x3, x3 +// adc x3, xzr, xzr +// adds x10, x15, x8 +// adcs x15, x13, x9 +// adcs x13, x12, x3 +// adcs x12, x1, xzr +// mov x3, #0xffffffff +// adds x6, x5, #0x1 +// sbcs x8, x10, x3 +// mov x3, #0xffffffff00000001 +// sbcs x9, x15, xzr +// sbcs x1, x13, x3 +// sbcs xzr, x12, xzr +// csel x6, x6, x5, cs +// csel x8, x8, x10, cs +// csel x9, x9, x15, cs +// csel x3, x1, x13, cs +// stp x6, x8, [x0] // @slothy:writes=buffer0 +// stp x9, x3, [x0, #16] // @slothy:writes=buffer16 +// ret +// +// The bash script used for step 2 is as follows: +// +// # Store the assembly instructions except the last 'ret' as, say, 'input.S' +// export OUTPUTS="[hint_buffer0,hint_buffer16]" +// export RESERVED_REGS="[x18,x19,x20,x21,x22,x23,x24,x25,x26,x27,x28,x29,x30,sp,q8,q9,q10,q11,q12,q13,q14,q15,v8,v9,v10,v11,v12,v13,v14,v15]" +// /tools/external/slothy.sh input.S my_out_dir +// # my_out_dir/3.opt.s is the optimized assembly. Its output may differ +// # from this file since the sequence is non-deterministically chosen. +// # Please add 'ret' at the end of the output assembly. + + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p256_neon) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p256_neon) + .text + .balign 4 + +S2N_BN_SYMBOL(bignum_montsqr_p256_neon): + + ldr q19, [x1] + ldp x9, x13, [x1] + ldr q23, [x1, #16] + ldr q0, [x1] + ldp x1, x10, [x1, #16] + uzp2 v29.4S, v19.4S, v19.4S + xtn v4.2S, v19.2D + umulh x8, x9, x13 + rev64 v20.4S, v23.4S + umull v16.2D, v19.2S, v19.2S + umull v1.2D, v29.2S, v4.2S + mul v20.4S, v20.4S, v0.4S + subs x14, x9, x13 + umulh x15, x9, x1 + mov x16, v16.d[1] + umull2 v4.2D, v19.4S, v19.4S + mov x4, v16.d[0] + uzp1 v17.4S, v23.4S, v0.4S + uaddlp v19.2D, v20.4S + lsr x7, x8, #63 + mul x11, x9, x13 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4S, v0.4S, v0.4S + shl v19.2D, v19.2D, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2D, v20.2S, v17.2S + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x13, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x16, x3, x16, cs + csel x14, x8, x14, cs + csel x12, x11, x12, cs + csel x2, x5, x2, cs + stp x14, x12, [x0, #16] + stp x16, x2, [x0] + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/arm/proofs/arm.ml b/arm/proofs/arm.ml index 201ef1c3..0b1b3ab4 100644 --- a/arm/proofs/arm.ml +++ b/arm/proofs/arm.ml @@ -91,6 +91,33 @@ let ARM_MK_EXEC_RULE th0 = LENGTH; LENGTH_APPEND] THENC NUM_REDUCE_CONV) (rhs (concl th1)) in CONJ (TRANS th1 th2) (ARM_DECODES_THM th0);; + +(* Take a slice of a machine code using SUB_LIST. + Returns a new definition of sliced machine code nmaed new_mc_name, + its SUB_LIST_CONV-reduced version, and + its EXEC version created by ARM_MK_EXEC_RULE. *) +let mk_sublist_of_mc (new_mc_name:string) (mc_th:thm) + (ofs_and_len:term*term) (execth:thm): thm * thm * thm = + let type_check (t:term) (ty:hol_type): unit = + if type_of t <> ty then + failwith (Printf.sprintf "`%s` must have type `%s` but has `%s`" + (string_of_term t) (string_of_type ty) + (string_of_type (type_of t))) + else + () in + let _ = type_check (fst ofs_and_len) `:num` in + let _ = type_check (snd ofs_and_len) `:num` in + let mc = fst (dest_eq (concl mc_th)) in + let new_mc = mk_var(new_mc_name,`:((8)word)list`) in + let new_mc_def = define (mk_eq (new_mc, + list_mk_comb (`SUB_LIST:num#num->((8)word)list->((8)word)list`, + [mk_pair ofs_and_len;mc]))) in + let new_mc = CONV_RULE (REWRITE_CONV [execth] THENC + ONCE_DEPTH_CONV NUM_SUB_CONV THENC + REWRITE_CONV [mc_th] THENC + RAND_CONV SUB_LIST_CONV) new_mc_def in + new_mc_def,new_mc,(ARM_MK_EXEC_RULE (new_mc));; + (* ------------------------------------------------------------------------- *) (* For ARM this is a trivial function. *) (* ------------------------------------------------------------------------- *) diff --git a/arm/proofs/bignum_montmul_p256.ml b/arm/proofs/bignum_montmul_p256.ml index 35af0526..e65ff29c 100644 --- a/arm/proofs/bignum_montmul_p256.ml +++ b/arm/proofs/bignum_montmul_p256.ml @@ -193,6 +193,15 @@ let bignum_montmul_p256_mc = let BIGNUM_MONTMUL_P256_EXEC = ARM_MK_EXEC_RULE bignum_montmul_p256_mc;; +(* bignum_montmul_p256_mc without ret. *) +let bignum_montmul_p256_core_mc_def, + bignum_montmul_p256_core_mc, + BIGNUM_MONTMUL_P256_CORE_EXEC = + mk_sublist_of_mc "bignum_montmul_p256_core_mc" + bignum_montmul_p256_mc + (`0`,`LENGTH bignum_montmul_p256_mc - 4`) + BIGNUM_MONTMUL_P256_EXEC;; + (* ------------------------------------------------------------------------- *) (* Proof. *) (* ------------------------------------------------------------------------- *) @@ -233,11 +242,11 @@ let p256shortredlemma = prove n < q * p_256 + p_256`, CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[p_256] THEN ARITH_TAC);; -let BIGNUM_MONTMUL_P256_CORRECT = time prove +let BIGNUM_MONTMUL_P256_CORE_CORRECT = time prove (`!z x y a b pc. nonoverlapping (word pc,0x2c0) (z,8 * 4) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_mc /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ @@ -258,7 +267,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** Globalize the a * b <= 2 EXP 256 * p_256 assumption ***) ASM_CASES_TAC `a * b <= 2 EXP 256 * p_256` THENL - [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTMUL_P256_EXEC (1--175)] THEN + [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTMUL_P256_CORE_EXEC (1--175)] THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,4) s0` THEN BIGNUM_DIGITIZE_TAC "y_" `bignum_from_memory (y,4) s0` THEN @@ -266,7 +275,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** First ADK block multiplying lower halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([5;6;8] @ (10--14) @ [20] @ (26--28)) (1--28) THEN SUBGOAL_THEN `bignum_of_wordlist [mullo_s5; sum_s26; sum_s27; sum_s28] = @@ -295,7 +304,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** the two Montgomery steps on the low half ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC (29--46) (29--46) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC (29--46) (29--46) THEN SUBGOAL_THEN `2 EXP 128 * bignum_of_wordlist [sum_s41; sum_s42; sum_s43; sum_s44] = bignum_of_wordlist[x_0;x_1] * bignum_of_wordlist[y_0;y_1] + @@ -311,7 +320,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** Second ADK block multiplying upper halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([47;48;50] @ (52--56) @ [62] @ (68--70)) (47--70) THEN SUBGOAL_THEN `bignum_of_wordlist [mullo_s47; sum_s68; sum_s69; sum_s70] = @@ -340,7 +349,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** First absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC [71;72;76;78] (71--78) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [71;72;76;78] (71--78) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN SUBGOAL_THEN @@ -376,7 +385,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** Second absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC [79;80;84;86] (79--86) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [79;80;84;86] (79--86) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN SUBGOAL_THEN @@ -412,7 +421,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** Collective sign-magnitude representation of middle product ***) - ARM_STEPS_TAC BIGNUM_MONTMUL_P256_EXEC [87] THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [87] THEN RULE_ASSUM_TAC(REWRITE_RULE[WORD_XOR_MASKS]) THEN ABBREV_TAC `msgn <=> ~(bignum_of_wordlist[x_2;x_3] < bignum_of_wordlist[x_0;x_1] <=> @@ -443,7 +452,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** the H + L' addition (a result that we then use twice) ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC (88--94) (88--94) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC (88--94) (88--94) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN SUBGOAL_THEN `bignum_of_wordlist @@ -460,7 +469,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** Third and final ADK block computing the mid-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([95;96;98] @ (100--104) @ [110] @ (116--118)) (95--118) THEN SUBGOAL_THEN `bignum_of_wordlist[mullo_s95; sum_s116; sum_s117; sum_s118] = @@ -489,7 +498,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** Big net accumulation computation absorbing cases over sign ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([121;123;125] @ (127--135)) (119--135) THEN SUBGOAL_THEN `2 EXP 128 * @@ -534,7 +543,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** Last two Montgomery steps to get the pre-reduced result ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC (136--154) (136--154) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC (136--154) (136--154) THEN ABBREV_TAC `t = bignum_of_wordlist [sum_s148; sum_s149; sum_s152; sum_s153; sum_s154]` THEN @@ -597,7 +606,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove MP_TAC(SPEC `t:num` p256shortredlemma) THEN ASM_REWRITE_TAC[] THEN CONV_TAC(LAND_CONV let_CONV) THEN STRIP_TAC THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([157;158] @ (161--165)) (155--165) THEN SUBGOAL_THEN `--(&2 pow 320) * &(bitval carry_s165) + @@ -635,7 +644,7 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove (*** The final corrective masked addition ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC [166;169;170;173] (166--175) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [166;169;170;173] (166--175) THEN ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN ASM_REWRITE_TAC[] THEN TRANS_TAC EQ_TRANS `t MOD p_256` THEN CONJ_TAC THENL @@ -664,6 +673,34 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove CONV_TAC WORD_REDUCE_CONV THEN DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC);; +let BIGNUM_MONTMUL_P256_CORRECT = time prove + (`!z x y a b pc. + nonoverlapping (word pc,0x2c0) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = word (pc + 0x2bc) /\ + (a * b <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS)`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTMUL_P256_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + MAP_EVERY EXISTS_TAC [`x:int64`;`y:int64`] THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montmul_p256_core_mc_def;BIGNUM_MONTMUL_P256_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + let BIGNUM_MONTMUL_P256_SUBROUTINE_CORRECT = time prove (`!z x y a b pc returnaddress. nonoverlapping (word pc,0x2c0) (z,8 * 4) @@ -697,11 +734,11 @@ let p256genshortredlemma = prove n < q * p_256 + p_256`, CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[p_256] THEN ARITH_TAC);; -let BIGNUM_AMONTMUL_P256_CORRECT = time prove +let BIGNUM_AMONTMUL_P256_CORE_CORRECT = time prove (`!z x y a b pc. nonoverlapping (word pc,0x2c0) (z,8 * 4) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_mc /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ @@ -724,7 +761,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** First ADK block multiplying lower halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([5;6;8] @ (10--14) @ [20] @ (26--28)) (1--28) THEN SUBGOAL_THEN `bignum_of_wordlist [mullo_s5; sum_s26; sum_s27; sum_s28] = @@ -753,7 +790,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** the two Montgomery steps on the low half ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC (29--46) (29--46) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC (29--46) (29--46) THEN SUBGOAL_THEN `2 EXP 128 * bignum_of_wordlist [sum_s41; sum_s42; sum_s43; sum_s44] = bignum_of_wordlist[x_0;x_1] * bignum_of_wordlist[y_0;y_1] + @@ -771,7 +808,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** Second ADK block multiplying upper halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([47;48;50] @ (52--56) @ [62] @ (68--70)) (47--70) THEN SUBGOAL_THEN `bignum_of_wordlist [mullo_s47; sum_s68; sum_s69; sum_s70] = @@ -800,7 +837,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** First absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC [71;72;76;78] (71--78) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [71;72;76;78] (71--78) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN SUBGOAL_THEN @@ -836,7 +873,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** Second absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC [79;80;84;86] (79--86) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [79;80;84;86] (79--86) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN SUBGOAL_THEN @@ -872,7 +909,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** Collective sign-magnitude representation of middle product ***) - ARM_STEPS_TAC BIGNUM_MONTMUL_P256_EXEC [87] THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [87] THEN RULE_ASSUM_TAC(REWRITE_RULE[WORD_XOR_MASKS]) THEN ABBREV_TAC `msgn <=> ~(bignum_of_wordlist[x_2;x_3] < bignum_of_wordlist[x_0;x_1] <=> @@ -903,7 +940,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** the H + L' addition (a result that we then use twice) ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC (88--94) (88--94) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC (88--94) (88--94) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN SUBGOAL_THEN `bignum_of_wordlist @@ -920,7 +957,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** Third and final ADK block computing the mid-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([95;96;98] @ (100--104) @ [110] @ (116--118)) (95--118) THEN SUBGOAL_THEN `bignum_of_wordlist[mullo_s95; sum_s116; sum_s117; sum_s118] = @@ -949,7 +986,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** Big net accumulation computation absorbing cases over sign ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([121;123;125] @ (127--135)) (119--135) THEN SUBGOAL_THEN `2 EXP 128 * @@ -994,7 +1031,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** Last two Montgomery steps to get the pre-reduced result ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC (136--154) (136--154) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC (136--154) (136--154) THEN ABBREV_TAC `t = bignum_of_wordlist [sum_s148; sum_s149; sum_s152; sum_s153; sum_s154]` THEN @@ -1054,7 +1091,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove MP_TAC(SPEC `t:num` p256genshortredlemma) THEN ASM_REWRITE_TAC[] THEN CONV_TAC(LAND_CONV let_CONV) THEN STRIP_TAC THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC ([157;158] @ (161--165)) (155--165) THEN SUBGOAL_THEN `--(&2 pow 320) * &(bitval carry_s165) + @@ -1092,7 +1129,7 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove (*** The final corrective masked addition ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_EXEC [166;169;170;173] (166--175) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC [166;169;170;173] (166--175) THEN ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN MATCH_MP_TAC(MESON[CONG; MOD_MOD_REFL] `x = y MOD n ==> (x == y) (mod n)`) THEN @@ -1122,6 +1159,33 @@ let BIGNUM_AMONTMUL_P256_CORRECT = time prove CONV_TAC WORD_REDUCE_CONV THEN DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC);; +let BIGNUM_AMONTMUL_P256_CORRECT = time prove + (`!z x y a b pc. + nonoverlapping (word pc,0x2c0) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = word (pc + 0x2bc) /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS)`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTMUL_P256_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + MAP_EVERY EXISTS_TAC [`x:int64`;`y:int64`] THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montmul_p256_core_mc_def;BIGNUM_MONTMUL_P256_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + let BIGNUM_AMONTMUL_P256_SUBROUTINE_CORRECT = time prove (`!z x y a b pc returnaddress. nonoverlapping (word pc,0x2c0) (z,8 * 4) diff --git a/arm/proofs/bignum_montmul_p256_neon.ml b/arm/proofs/bignum_montmul_p256_neon.ml new file mode 100644 index 00000000..aa94cdd4 --- /dev/null +++ b/arm/proofs/bignum_montmul_p256_neon.ml @@ -0,0 +1,820 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(****************************************************************************** + The first program equivalence between the source program and + its SIMD-vectorized but not instruction-unscheduled program +******************************************************************************) + +needs "arm/proofs/bignum_montmul_p256.ml";; +needs "arm/proofs/equiv.ml";; +needs "arm/proofs/neon_helper.ml";; + +(* This is the intermediate program that is equivalent to both + bignum_montmul_p256 and bignum_montmul_p256_neon. This is a vectorized + version of bignum_montmul_p256 but instructions are unscheduled. *) + +let bignum_montmul_p256_interm1_ops:int list = [ + 0xa9403427; (* ldp x7, x13, [x1] *) + 0x3dc00030; (* ldr q16, [x1] *) + 0xa9413c29; (* ldp x9, x15, [x1, #16] *) + 0xa940104e; (* ldp x14, x4, [x2] *) + 0x3dc00053; (* ldr q19, [x2] *) + 0xa941404c; (* ldp x12, x16, [x2, #16] *) + 0x3dc0043d; (* ldr q29, [x1, #16] *) + 0x3dc0045e; (* ldr q30, [x2, #16] *) + 0x4e901a71; (* uzp1 v17.4s, v19.4s, v16.4s *) + 0x4ea00a72; (* rev64 v18.4s, v19.4s *) + 0x4e901a1c; (* uzp1 v28.4s, v16.4s, v16.4s *) + 0x4eb09e58; (* mul v24.4s, v18.4s, v16.4s *) + 0x6ea02b12; (* uaddlp v18.2d, v24.4s *) + 0x4f605650; (* shl v16.2d, v18.2d, #32 *) + 0x2eb18390; (* umlal v16.2d, v28.2s, v17.2s *) + 0x4e083e02; (* mov x2, v16.d[0] *) + 0x4e183e01; (* mov x1, v16.d[1] *) + 0x9bce7ce5; (* umulh x5, x7, x14 *) + 0xab010051; (* adds x17, x2, x1 *) + 0x9bc47da3; (* umulh x3, x13, x4 *) + 0xba0300a8; (* adcs x8, x5, x3 *) + 0xba1f006a; (* adcs x10, x3, xzr *) + 0xab1100a5; (* adds x5, x5, x17 *) + 0xba080021; (* adcs x1, x1, x8 *) + 0xba1f0148; (* adcs x8, x10, xzr *) + 0xeb0d00f1; (* subs x17, x7, x13 *) + 0xda912623; (* cneg x3, x17, cc // cc = lo, ul, last *) + 0xda9f23eb; (* csetm x11, cc // cc = lo, ul, last *) + 0xeb0e008a; (* subs x10, x4, x14 *) + 0xda8a2546; (* cneg x6, x10, cc // cc = lo, ul, last *) + 0x9b067c71; (* mul x17, x3, x6 *) + 0x9bc67c66; (* umulh x6, x3, x6 *) + 0xda8b216b; (* cinv x11, x11, cc // cc = lo, ul, last *) + 0xca0b0231; (* eor x17, x17, x11 *) + 0xca0b00c3; (* eor x3, x6, x11 *) + 0xb100057f; (* cmn x11, #0x1 *) + 0xba1100a5; (* adcs x5, x5, x17 *) + 0xba03002a; (* adcs x10, x1, x3 *) + 0x9a0b0101; (* adc x1, x8, x11 *) + 0xd3607c43; (* lsl x3, x2, #32 *) + 0xeb030051; (* subs x17, x2, x3 *) + 0xd360fc4b; (* lsr x11, x2, #32 *) + 0xda0b0048; (* sbc x8, x2, x11 *) + 0xab0300a2; (* adds x2, x5, x3 *) + 0xba0b0146; (* adcs x6, x10, x11 *) + 0xba110023; (* adcs x3, x1, x17 *) + 0x9a1f010a; (* adc x10, x8, xzr *) + 0xd3607c45; (* lsl x5, x2, #32 *) + 0xeb050051; (* subs x17, x2, x5 *) + 0xd360fc4b; (* lsr x11, x2, #32 *) + 0xda0b0048; (* sbc x8, x2, x11 *) + 0xab0500c2; (* adds x2, x6, x5 *) + 0xba0b0066; (* adcs x6, x3, x11 *) + 0xba110141; (* adcs x1, x10, x17 *) + 0x9a1f0111; (* adc x17, x8, xzr *) + 0xa9001802; (* stp x2, x6, [x0] *) + 0xa9014401; (* stp x1, x17, [x0, #16] *) + 0x6f00e5fc; (* movi v28.2d, #0xffffffff *) + 0x4e9e5bd6; (* uzp2 v22.4s, v30.4s, v30.4s *) + 0x0ea12ba4; (* xtn v4.2s, v29.2d *) + 0x0ea12bdb; (* xtn v27.2s, v30.2d *) + 0x4ea00bd7; (* rev64 v23.4s, v30.4s *) + 0x2ebbc091; (* umull v17.2d, v4.2s, v27.2s *) + 0x2eb6c087; (* umull v7.2d, v4.2s, v22.2s *) + 0x4e9d5bb0; (* uzp2 v16.4s, v29.4s, v29.4s *) + 0x4ebd9efd; (* mul v29.4s, v23.4s, v29.4s *) + 0x6f601627; (* usra v7.2d, v17.2d, #32 *) + 0x2eb6c21e; (* umull v30.2d, v16.2s, v22.2s *) + 0x6ea02bb4; (* uaddlp v20.2d, v29.4s *) + 0x4e3c1cf2; (* and v18.16b, v7.16b, v28.16b *) + 0x2ebb8212; (* umlal v18.2d, v16.2s, v27.2s *) + 0x4f605690; (* shl v16.2d, v20.2d, #32 *) + 0x6f6014fe; (* usra v30.2d, v7.2d, #32 *) + 0x2ebb8090; (* umlal v16.2d, v4.2s, v27.2s *) + 0x6f60165e; (* usra v30.2d, v18.2d, #32 *) + 0x4e083e0b; (* mov x11, v16.d[0] *) + 0x4e183e05; (* mov x5, v16.d[1] *) + 0x4e083fc2; (* mov x2, v30.d[0] *) + 0xab050163; (* adds x3, x11, x5 *) + 0x4e183fd1; (* mov x17, v30.d[1] *) + 0xba110048; (* adcs x8, x2, x17 *) + 0xba1f0221; (* adcs x1, x17, xzr *) + 0xab030051; (* adds x17, x2, x3 *) + 0xba0800a8; (* adcs x8, x5, x8 *) + 0xba1f0021; (* adcs x1, x1, xzr *) + 0xeb0f0122; (* subs x2, x9, x15 *) + 0xda822446; (* cneg x6, x2, cc // cc = lo, ul, last *) + 0xda9f23e3; (* csetm x3, cc // cc = lo, ul, last *) + 0xeb0c0202; (* subs x2, x16, x12 *) + 0xda822445; (* cneg x5, x2, cc // cc = lo, ul, last *) + 0x9b057cca; (* mul x10, x6, x5 *) + 0x9bc57cc5; (* umulh x5, x6, x5 *) + 0xda832063; (* cinv x3, x3, cc // cc = lo, ul, last *) + 0xca03014a; (* eor x10, x10, x3 *) + 0xca0300a6; (* eor x6, x5, x3 *) + 0xb100047f; (* cmn x3, #0x1 *) + 0xba0a0222; (* adcs x2, x17, x10 *) + 0xba060106; (* adcs x6, x8, x6 *) + 0x9a030025; (* adc x5, x1, x3 *) + 0xeb070127; (* subs x7, x9, x7 *) + 0xfa0d01e3; (* sbcs x3, x15, x13 *) + 0xda1f03f1; (* ngc x17, xzr *) + 0xb100063f; (* cmn x17, #0x1 *) + 0xca1100e8; (* eor x8, x7, x17 *) + 0xba1f010d; (* adcs x13, x8, xzr *) + 0xca11006f; (* eor x15, x3, x17 *) + 0xba1f01e1; (* adcs x1, x15, xzr *) + 0xeb0c01c9; (* subs x9, x14, x12 *) + 0xfa10008e; (* sbcs x14, x4, x16 *) + 0xda1f03e3; (* ngc x3, xzr *) + 0xb100047f; (* cmn x3, #0x1 *) + 0xca03012c; (* eor x12, x9, x3 *) + 0xba1f0187; (* adcs x7, x12, xzr *) + 0xca0301cc; (* eor x12, x14, x3 *) + 0xba1f018c; (* adcs x12, x12, xzr *) + 0xca03022a; (* eor x10, x17, x3 *) + 0xa9403c04; (* ldp x4, x15, [x0] *) + 0xab040171; (* adds x17, x11, x4 *) + 0xba0f0050; (* adcs x16, x2, x15 *) + 0xa9413c03; (* ldp x3, x15, [x0, #16] *) + 0xba0300cb; (* adcs x11, x6, x3 *) + 0xba0f00a9; (* adcs x9, x5, x15 *) + 0x9a1f03ee; (* adc x14, xzr, xzr *) + 0x9b077da6; (* mul x6, x13, x7 *) + 0x9b0c7c28; (* mul x8, x1, x12 *) + 0x9bc77da5; (* umulh x5, x13, x7 *) + 0xab0800c3; (* adds x3, x6, x8 *) + 0x9bcc7c22; (* umulh x2, x1, x12 *) + 0xba0200a4; (* adcs x4, x5, x2 *) + 0xba1f004f; (* adcs x15, x2, xzr *) + 0xab0300a3; (* adds x3, x5, x3 *) + 0xba040104; (* adcs x4, x8, x4 *) + 0xba1f01ef; (* adcs x15, x15, xzr *) + 0xeb0101a1; (* subs x1, x13, x1 *) + 0xda812428; (* cneg x8, x1, cc // cc = lo, ul, last *) + 0xda9f23e5; (* csetm x5, cc // cc = lo, ul, last *) + 0xeb070181; (* subs x1, x12, x7 *) + 0xda812422; (* cneg x2, x1, cc // cc = lo, ul, last *) + 0x9b027d07; (* mul x7, x8, x2 *) + 0x9bc27d02; (* umulh x2, x8, x2 *) + 0xda8520ad; (* cinv x13, x5, cc // cc = lo, ul, last *) + 0xca0d00e7; (* eor x7, x7, x13 *) + 0xca0d0042; (* eor x2, x2, x13 *) + 0xb10005bf; (* cmn x13, #0x1 *) + 0xba070063; (* adcs x3, x3, x7 *) + 0xba020084; (* adcs x4, x4, x2 *) + 0x9a0d01e5; (* adc x5, x15, x13 *) + 0xb100055f; (* cmn x10, #0x1 *) + 0xca0a00c8; (* eor x8, x6, x10 *) + 0xba11010f; (* adcs x15, x8, x17 *) + 0xca0a0062; (* eor x2, x3, x10 *) + 0xba100042; (* adcs x2, x2, x16 *) + 0xca0a0086; (* eor x6, x4, x10 *) + 0xba0b00c3; (* adcs x3, x6, x11 *) + 0xca0a00a7; (* eor x7, x5, x10 *) + 0xba0900e1; (* adcs x1, x7, x9 *) + 0xba0a01cd; (* adcs x13, x14, x10 *) + 0xba1f014c; (* adcs x12, x10, xzr *) + 0x9a1f014a; (* adc x10, x10, xzr *) + 0xab110065; (* adds x5, x3, x17 *) + 0xba100028; (* adcs x8, x1, x16 *) + 0xba0b01ad; (* adcs x13, x13, x11 *) + 0xba090186; (* adcs x6, x12, x9 *) + 0x9a0e0144; (* adc x4, x10, x14 *) + 0xd3607de9; (* lsl x9, x15, #32 *) + 0xeb0901e7; (* subs x7, x15, x9 *) + 0xd360fde1; (* lsr x1, x15, #32 *) + 0xda0101ee; (* sbc x14, x15, x1 *) + 0xab09004a; (* adds x10, x2, x9 *) + 0xba0100af; (* adcs x15, x5, x1 *) + 0xba070105; (* adcs x5, x8, x7 *) + 0x9a1f01c7; (* adc x7, x14, xzr *) + 0xd3607d4c; (* lsl x12, x10, #32 *) + 0xeb0c0151; (* subs x17, x10, x12 *) + 0xd360fd49; (* lsr x9, x10, #32 *) + 0xda090143; (* sbc x3, x10, x9 *) + 0xab0c01ec; (* adds x12, x15, x12 *) + 0xba0900a5; (* adcs x5, x5, x9 *) + 0xba1100ee; (* adcs x14, x7, x17 *) + 0x9a1f0062; (* adc x2, x3, xzr *) + 0xab0e01ae; (* adds x14, x13, x14 *) + 0xba0200c6; (* adcs x6, x6, x2 *) + 0x9a1f0091; (* adc x17, x4, xzr *) + 0x91000627; (* add x7, x17, #0x1 *) + 0xd3607cf0; (* lsl x16, x7, #32 *) + 0xab1000c3; (* adds x3, x6, x16 *) + 0x9a1f0221; (* adc x1, x17, xzr *) + 0xcb0703ef; (* neg x15, x7 *) + 0xd100060d; (* sub x13, x16, #0x1 *) + 0xeb0f0189; (* subs x9, x12, x15 *) + 0xfa0d00a8; (* sbcs x8, x5, x13 *) + 0xfa1f01cf; (* sbcs x15, x14, xzr *) + 0xfa070063; (* sbcs x3, x3, x7 *) + 0xfa070027; (* sbcs x7, x1, x7 *) + 0xab070124; (* adds x4, x9, x7 *) + 0xb2407fe6; (* mov x6, #0xffffffff // #4294967295 *) + 0x8a0700d1; (* and x17, x6, x7 *) + 0xba110108; (* adcs x8, x8, x17 *) + 0xba1f01e5; (* adcs x5, x15, xzr *) + 0xb26083ea; (* mov x10, #0xffffffff00000001 // #-4294967295 *) + 0x8a070141; (* and x1, x10, x7 *) + 0x9a01006c; (* adc x12, x3, x1 *) + 0xa9002004; (* stp x4, x8, [x0] *) + 0xa9013005; (* stp x5, x12, [x0, #16] *) + 0xd65f03c0; (* ret *) +];; + +let bignum_montmul_p256_interm1_mc = + let charlist = List.concat_map + (fun op32 -> + [Char.chr (Int.logand op32 255); + Char.chr (Int.logand (Int.shift_right op32 8) 255); + Char.chr (Int.logand (Int.shift_right op32 16) 255); + Char.chr (Int.logand (Int.shift_right op32 24) 255)]) + bignum_montmul_p256_interm1_ops in + let byte_list = Bytes.init (List.length charlist) (fun i -> List.nth charlist i) in + define_word_list "bignum_montmul_p256_interm1_mc" (term_of_bytes byte_list);; + +let BIGNUM_MONTMUL_P256_INTERM1_EXEC = + ARM_MK_EXEC_RULE bignum_montmul_p256_interm1_mc;; + +let bignum_montmul_p256_interm1_core_mc_def, + bignum_montmul_p256_interm1_core_mc, + BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC = + mk_sublist_of_mc "bignum_montmul_p256_interm1_core_mc" + bignum_montmul_p256_interm1_mc + (`0`,`LENGTH bignum_montmul_p256_interm1_mc - 4`) + BIGNUM_MONTMUL_P256_INTERM1_EXEC;; + +let equiv_input_states = new_definition + `!s1 s1' x y z. + (equiv_input_states:(armstate#armstate)->int64->int64->int64->bool) (s1,s1') x y z <=> + (?a b. + C_ARGUMENTS [z; x; y] s1 /\ + C_ARGUMENTS [z; x; y] s1' /\ + bignum_from_memory (x,4) s1 = a /\ + bignum_from_memory (x,4) s1' = a /\ + bignum_from_memory (y,4) s1 = b /\ + bignum_from_memory (y,4) s1' = b)`;; + +let equiv_output_states = new_definition + `!s1 s1' z. + (equiv_output_states:(armstate#armstate)->int64->bool) (s1,s1') z <=> + (?a. + bignum_from_memory (z,4) s1 = a /\ + bignum_from_memory (z,4) s1' = a)`;; + +let actions = [ + ("equal", 0, 1, 0, 1); + ("insert", 1, 1, 1, 2); + ("equal", 1, 3, 2, 4); + ("insert", 3, 3, 4, 5); + ("equal", 3, 4, 5, 6); + ("replace", 4, 6, 6, 17); + ("equal", 6, 46, 17, 57); + ("replace", 46, 49, 57, 78); + ("equal", 49, 50, 78, 79); + ("replace", 50, 51, 79, 80); + ("equal", 51, 175, 80, 204) +];; + +let equiv_goal1 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 4)) + [(word pc:int64,LENGTH bignum_montmul_p256_mc); + (word pc2:int64,LENGTH bignum_montmul_p256_interm1_mc)]` + equiv_input_states + equiv_output_states + bignum_montmul_p256_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montmul_p256_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]`;; + + + +let _org_extra_word_CONV = !extra_word_CONV;; +extra_word_CONV := + [GEN_REWRITE_CONV I [WORD_BITMANIP_SIMP_LEMMAS; WORD_MUL64_LO; WORD_MUL64_HI]] + @ (!extra_word_CONV);; + +let BIGNUM_MONTMUL_P256_EQUIV1 = prove(equiv_goal1, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P256_EXEC; + BIGNUM_MONTMUL_P256_CORE_EXEC; + BIGNUM_MONTMUL_P256_INTERM1_EXEC; + BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Start *) + EQUIV_STEPS_TAC actions + BIGNUM_MONTMUL_P256_CORE_EXEC + BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,4) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + +extra_word_CONV := _org_extra_word_CONV;; + + +(****************************************************************************** + The second program equivalence between the intermediate program and + fully optimized program +******************************************************************************) + +let bignum_montmul_p256_neon_mc = + define_from_elf "bignum_montmul_p256_neon_mc" + "arm/p256/bignum_montmul_p256_neon.o";; + +let BIGNUM_MONTMUL_P256_NEON_EXEC = + ARM_MK_EXEC_RULE bignum_montmul_p256_neon_mc;; + +let bignum_montmul_p256_neon_core_mc_def, + bignum_montmul_p256_neon_core_mc, + BIGNUM_MONTMUL_P256_NEON_CORE_EXEC = + mk_sublist_of_mc "bignum_montmul_p256_neon_core_mc" + bignum_montmul_p256_neon_mc + (`0`,`LENGTH bignum_montmul_p256_neon_mc - 4`) + BIGNUM_MONTMUL_P256_NEON_EXEC;; + + +let equiv_goal2 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 4)) + [(word pc:int64,LENGTH bignum_montmul_p256_interm1_mc); + (word pc2:int64,LENGTH bignum_montmul_p256_neon_mc)]` + equiv_input_states + equiv_output_states + bignum_montmul_p256_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]` + bignum_montmul_p256_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]`;; + +(* Line numbers from bignum_montmul_p256_neon_core_mc (the fully optimized + prog.) to bignum_montmul_p256_interm1_core_mc (the intermediate prog.) *) +let inst_map = [ + 5;1;2;4;3;10;26;28;27;12;20;9;100;8;101;102;86;13;18;11;87;14;88;29;104;15;30;33;103;7;105;31;106;107;61;16;17;59;32;34;19;21;60;22;23;24;6;25;36;37;35;38;40;39;108;109;42;110;89;93;90;41;114;43;44;45;91;46;112;47;111;92;113;115;48;49;50;51;52;53;126;63;54;64;55;62;57;58;124;66;134;65;136;67;125;68;135;69;137;70;128;72;138;71;141;74;127;139;95;129;56;73;130;140;131;132;142;75;133;144;77;145;143;146;76;80;78;147;79;81;117;82;83;94;84;85;96;120;97;98;99;118;119;116;121;149;122;123;148;151;150;153;152;154;155;196;156;167;157;158;159;160;161;162;163;165;164;166;168;169;170;173;171;175;172;174;176;177;178;179;180;181;182;183;184;188;185;186;189;187;190;191;192;193;194;200;195;197;198;201;203;199;202;204 +];; + +(* (state number, (equation, fresh var)) *) +let state_to_abbrevs: (int * thm) list ref = ref [];; + +let BIGNUM_MONTMUL_P256_EQUIV2 = prove( + equiv_goal2, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P256_INTERM1_EXEC; + BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC; + BIGNUM_MONTMUL_P256_NEON_EXEC; + BIGNUM_MONTMUL_P256_NEON_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Left *) + ARM_STEPS'_AND_ABBREV_TAC BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC + (1--204) state_to_abbrevs THEN + + (* Right *) + ARM_STEPS'_AND_REWRITE_TAC BIGNUM_MONTMUL_P256_NEON_CORE_EXEC + (1--204) inst_map state_to_abbrevs THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,4) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + + +(****************************************************************************** + Use transitivity of two program equivalences to prove end-to-end + correctness +******************************************************************************) + +let equiv_goal = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 4)) + [(word pc:int64,LENGTH bignum_montmul_p256_mc); + (word pc2:int64,LENGTH bignum_montmul_p256_neon_mc)]` + equiv_input_states + equiv_output_states + bignum_montmul_p256_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montmul_p256_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]`;; + +let equiv_output_states_TRANS = prove( + `!s s2 s' + z. equiv_output_states (s,s') z /\ equiv_output_states (s',s2) z + ==> equiv_output_states (s,s2) z`, + MESON_TAC[equiv_output_states]);; + +let BIGNUM_MONTMUL_P256_EQUIV = prove(equiv_goal, + + REPEAT STRIP_TAC THEN + SUBGOAL_THEN + `?pc3. + ALL (nonoverlapping (z,8 * 4)) + [word pc:int64, LENGTH bignum_montmul_p256_mc; + word pc3:int64, LENGTH bignum_montmul_p256_interm1_mc] /\ + ALL (nonoverlapping (z,8 * 4)) + [word pc3:int64, LENGTH bignum_montmul_p256_interm1_mc; + word pc2:int64, LENGTH bignum_montmul_p256_neon_mc] /\ + ALL (nonoverlapping + (word pc3:int64, LENGTH bignum_montmul_p256_interm1_mc)) + [x,8 * 4; y,8 * 4] /\ + 4 divides val (word pc3:int64)` + MP_TAC THENL [ + FIRST_X_ASSUM MP_TAC THEN + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + ASM_REWRITE_TAC + [ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P256_INTERM1_EXEC;BIGNUM_MONTMUL_P256_NEON_EXEC; + BIGNUM_MONTMUL_P256_EXEC;GSYM CONJ_ASSOC] THEN + STRIP_TAC THEN + ASM_REWRITE_TAC[] THEN + FIND_HOLE_TAC; + + ALL_TAC + ] THEN + STRIP_TAC THEN + + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P256_EQUIV1 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P256_EQUIV2 th))) THEN + FIRST_X_ASSUM (fun c1 -> + FIRST_X_ASSUM (fun c2 -> + MP_TAC (REWRITE_RULE [] (MATCH_MP ENSURES2_CONJ2 (CONJ c1 c2))) + )) THEN + + (* break 'ALL nonoverlapping' in assumptions *) + RULE_ASSUM_TAC (REWRITE_RULE[ + ALLPAIRS;ALL; + BIGNUM_MONTMUL_P256_EXEC; + BIGNUM_MONTMUL_P256_NEON_EXEC; + BIGNUM_MONTMUL_P256_INTERM1_EXEC; + NONOVERLAPPING_CLAUSES]) THEN + SPLIT_FIRST_CONJ_ASSUM_TAC THEN + + MATCH_MP_TAC ENSURES2_WEAKEN THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[TAUT `(p /\ q /\ r) /\ p /\ q /\ r' <=> p /\ q /\ r /\ r'`] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc3,LENGTH bignum_montmul_p256_interm1_core_mc)) + bignum_montmul_p256_interm1_core_mc + (write PC (word pc3) s')` THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC THENL [ + UNDISCH_TAC `equiv_input_states (s,s') x y z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REWRITE_TAC[] THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC; + + UNDISCH_TAC `equiv_input_states (s,s') x y z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REWRITE_TAC[] THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC + ]; + + REPEAT GEN_TAC THEN STRIP_TAC THEN + ASM_REWRITE_TAC[] THEN ASM_MESON_TAC[equiv_output_states_TRANS]; + + SUBSUMED_MAYCHANGE_TAC + ]);; + + + + +let event_n_at_pc_goal = mk_eventually_n_at_pc_statement + `nonoverlapping (word pc:int64,LENGTH bignum_montmul_p256_mc) + (z:int64,8 * 4)` + [`z:int64`;`x:int64`;`y:int64`] (*pc_mc_ofs*)0 bignum_montmul_p256_core_mc (*pc_ofs*)0 + `\s0. C_ARGUMENTS [z;x;y] s0`;; + +let BIGNUM_MONTMUL_P256_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, + + REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTMUL_P256_CORE_EXEC; + BIGNUM_MONTMUL_P256_EXEC;BARRIER_INST_BYTES_LENGTH] THEN + REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN + SUBGOAL_THEN `4 divides (LENGTH bignum_montmul_p256_core_mc)` + (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; + BIGNUM_MONTMUL_P256_CORE_EXEC]) THENL [ + REWRITE_TAC[BIGNUM_MONTMUL_P256_CORE_EXEC] THEN CONV_TAC NUM_DIVIDES_CONV; + ALL_TAC] THEN + REPEAT_N 4 GEN_TAC THEN + STRIP_TAC THEN + (* now start..! *) + X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN + (* eventually ==> eventually_n *) + PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_MONTMUL_P256_CORE_EXEC);; + + +(****************************************************************************** + Inducing BIGNUM_MONTMUL_P256_NEON_SUBROUTINE_CORRECT + from BIGNUM_MONTMUL_P256_CORE_CORRECT +******************************************************************************) + +let BIGNUM_MONTMUL_P256_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTMUL_P256_EXEC + BIGNUM_MONTMUL_P256_CORE_EXEC + BIGNUM_MONTMUL_P256_CORE_CORRECT + BIGNUM_MONTMUL_P256_EVENTUALLY_N_AT_PC;; + + +let BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT = prove( + `!z x a pc2. + nonoverlapping (word pc2,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montmul_p256_neon_core_mc /\ + read PC s = word pc2 /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = word (pc2 + 816) /\ + (a * b <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (z:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (x:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (y:int64,8 * 4) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P256_EQUIV BIGNUM_MONTMUL_P256_CORE_CORRECT_N + [BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTMUL_P256_EXEC; + BIGNUM_MONTMUL_P256_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes))) + (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC); + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_MONTMUL_P256_NEON_CORRECT = time prove + (`!z x y a b pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_neon_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = word (pc + 816) /\ + (a * b <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montmul_p256_neon_core_mc_def;BIGNUM_MONTMUL_P256_NEON_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + +let BIGNUM_MONTMUL_P256_NEON_SUBROUTINE_CORRECT = time prove + (`!z x y a b pc returnaddress. + nonoverlapping (word pc,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_neon_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = returnaddress /\ + (a * b <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REWRITE_TAC[BIGNUM_MONTMUL_P256_NEON_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTMUL_P256_NEON_EXEC + (REWRITE_RULE[BIGNUM_MONTMUL_P256_NEON_EXEC] BIGNUM_MONTMUL_P256_NEON_CORRECT));; + + +(****************************************************************************** + Inducing BIGNUM_AMONTMUL_P256_NEON_SUBROUTINE_CORRECT + from BIGNUM_AMONTMUL_P256_CORE_CORRECT +******************************************************************************) + +let BIGNUM_AMONTMUL_P256_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTMUL_P256_EXEC + BIGNUM_MONTMUL_P256_CORE_EXEC + BIGNUM_AMONTMUL_P256_CORE_CORRECT + BIGNUM_MONTMUL_P256_EVENTUALLY_N_AT_PC;; + +let BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT = prove( + `!z x y a b pc2. + nonoverlapping (word pc2,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montmul_p256_neon_core_mc /\ + read PC s = word pc2 /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = word (pc2 + 816) /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (z:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (x:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (y:int64,8 * 4) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P256_EQUIV BIGNUM_AMONTMUL_P256_CORE_CORRECT_N + [BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTMUL_P256_EXEC; + BIGNUM_MONTMUL_P256_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes))) + (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC); + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_AMONTMUL_P256_NEON_CORRECT = time prove + (`!z x y a b pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_neon_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = word (pc + 816) /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + MAP_EVERY EXISTS_TAC [`x:int64`;`y:int64`] THEN + ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montmul_p256_neon_core_mc_def;BIGNUM_MONTMUL_P256_NEON_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + +let BIGNUM_AMONTMUL_P256_NEON_SUBROUTINE_CORRECT = time prove + (`!z x y a b pc returnaddress. + nonoverlapping (word pc,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_neon_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,4) s = a /\ + bignum_from_memory (y,4) s = b) + (\s. read PC s = returnaddress /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REWRITE_TAC[BIGNUM_MONTMUL_P256_NEON_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTMUL_P256_NEON_EXEC + (REWRITE_RULE[BIGNUM_MONTMUL_P256_NEON_EXEC] BIGNUM_AMONTMUL_P256_NEON_CORRECT));; + diff --git a/arm/proofs/bignum_montsqr_p256.ml b/arm/proofs/bignum_montsqr_p256.ml index 9b8a234a..c4be0cc7 100644 --- a/arm/proofs/bignum_montsqr_p256.ml +++ b/arm/proofs/bignum_montsqr_p256.ml @@ -142,6 +142,15 @@ let bignum_montsqr_p256_mc = let BIGNUM_MONTSQR_P256_EXEC = ARM_MK_EXEC_RULE bignum_montsqr_p256_mc;; +(* bignum_montsqr_p256_mc without ret. *) +let bignum_montsqr_p256_core_mc_def, + bignum_montsqr_p256_core_mc, + BIGNUM_MONTSQR_P256_CORE_EXEC = + mk_sublist_of_mc "bignum_montsqr_p256_core_mc" + bignum_montsqr_p256_mc + (`0`,`LENGTH bignum_montsqr_p256_mc - 4`) + BIGNUM_MONTSQR_P256_EXEC;; + (* ------------------------------------------------------------------------- *) (* Proof. *) (* ------------------------------------------------------------------------- *) @@ -192,11 +201,11 @@ let lemma2 = prove ASM_SIMP_TAC[VAL_WORD_SUB_CASES; GSYM REAL_OF_NUM_SUB] THEN REAL_ARITH_TAC);; -let BIGNUM_MONTSQR_P256_CORRECT = time prove +let BIGNUM_MONTSQR_P256_CORE_CORRECT = time prove (`!z x a pc. nonoverlapping (word pc,0x1f4) (z,8 * 4) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_mc /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) @@ -216,13 +225,13 @@ let BIGNUM_MONTSQR_P256_CORRECT = time prove (*** Globalize the a EXP 2 <= 2 EXP 256 * p_256 assumption ***) ASM_CASES_TAC `a EXP 2 <= 2 EXP 256 * p_256` THENL - [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTSQR_P256_EXEC (1--124)] THEN + [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTSQR_P256_CORE_EXEC (1--124)] THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,4) s0` THEN (*** Squaring and Montgomery reduction of lower half ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC [7;9;14;16;18;19;20;21;22;23;24; 25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;40] (1--40) THEN @@ -246,7 +255,7 @@ let BIGNUM_MONTSQR_P256_CORRECT = time prove (*** ADK cross-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC [41;42;49;54;55;57;58;59;60;61;63;64;65] (41--65) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN SUBGOAL_THEN @@ -275,7 +284,7 @@ let BIGNUM_MONTSQR_P256_CORRECT = time prove (*** Remaining Montgomery reduction and addition of remaining high terms ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC (66--111) (66--111) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC (66--111) (66--111) THEN ABBREV_TAC `t = bignum_of_wordlist [sum_s95; sum_s108; sum_s109; sum_s110; sum_s111]` THEN @@ -320,7 +329,7 @@ let BIGNUM_MONTSQR_P256_CORRECT = time prove (*** Final correction stage ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC [113;114;116;117;118] (112--124) THEN RULE_ASSUM_TAC(REWRITE_RULE [GSYM WORD_BITVAL; COND_SWAP; REAL_BITVAL_NOT]) THEN @@ -359,6 +368,33 @@ let BIGNUM_MONTSQR_P256_CORRECT = time prove ASM_REWRITE_TAC[BITVAL_CLAUSES; VAL_WORD_BITVAL] THEN DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC);; +let BIGNUM_MONTSQR_P256_CORRECT = time prove + (`!z x a pc. + nonoverlapping (word pc,0x1f4) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = word (pc + 0x1f0) /\ + (a EXP 2 <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS)`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTSQR_P256_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montsqr_p256_core_mc_def;BIGNUM_MONTSQR_P256_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + let BIGNUM_MONTSQR_P256_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. nonoverlapping (word pc,0x1f4) (z,8 * 4) @@ -383,11 +419,11 @@ let BIGNUM_MONTSQR_P256_SUBROUTINE_CORRECT = time prove (* But the output, still 256 bits, may then not be fully reduced mod p_256. *) (* ------------------------------------------------------------------------- *) -let BIGNUM_AMONTSQR_P256_CORRECT = time prove +let BIGNUM_AMONTSQR_P256_CORE_CORRECT = time prove (`!z x a pc. nonoverlapping (word pc,0x1f4) (z,8 * 4) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_mc /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) @@ -407,7 +443,7 @@ let BIGNUM_AMONTSQR_P256_CORRECT = time prove (*** Squaring and Montgomery reduction of lower half ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC [7;9;14;16;18;19;20;21;22;23;24; 25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;40] (1--40) THEN @@ -431,7 +467,7 @@ let BIGNUM_AMONTSQR_P256_CORRECT = time prove (*** ADK cross-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC [41;42;49;54;55;57;58;59;60;61;63;64;65] (41--65) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN SUBGOAL_THEN @@ -460,7 +496,7 @@ let BIGNUM_AMONTSQR_P256_CORRECT = time prove (*** Remaining Montgomery reduction and addition of remaining high terms ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC (66--111) (66--111) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC (66--111) (66--111) THEN ABBREV_TAC `t = bignum_of_wordlist [sum_s95; sum_s108; sum_s109; sum_s110; sum_s111]` THEN @@ -503,7 +539,7 @@ let BIGNUM_AMONTSQR_P256_CORRECT = time prove (*** Final correction ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC [113;114;116;117;118] (112--124) THEN RULE_ASSUM_TAC(REWRITE_RULE [GSYM WORD_BITVAL; COND_SWAP; REAL_BITVAL_NOT]) THEN @@ -547,6 +583,32 @@ let BIGNUM_AMONTSQR_P256_CORRECT = time prove REWRITE_TAC[BITVAL_CLAUSES; p_256] THEN CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC);; +let BIGNUM_AMONTSQR_P256_CORRECT = time prove + (`!z x a pc. + nonoverlapping (word pc,0x1f4) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = word (pc + 0x1f0) /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS)`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTSQR_P256_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montsqr_p256_core_mc_def;BIGNUM_MONTSQR_P256_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + let BIGNUM_AMONTSQR_P256_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. nonoverlapping (word pc,0x1f4) (z,8 * 4) diff --git a/arm/proofs/bignum_montsqr_p256_neon.ml b/arm/proofs/bignum_montsqr_p256_neon.ml new file mode 100644 index 00000000..e452aee9 --- /dev/null +++ b/arm/proofs/bignum_montsqr_p256_neon.ml @@ -0,0 +1,874 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(****************************************************************************** + The first program equivalence between the source program and + its SIMD-vectorized but not instruction-unscheduled program +******************************************************************************) + +needs "arm/proofs/bignum_montsqr_p256.ml";; +needs "arm/proofs/equiv.ml";; +needs "arm/proofs/neon_helper.ml";; + +(* This is the intermediate program that is equivalent to both + bignum_montsqr_p256 and bignum_montsqr_p256_neon. This is a vectorized + version of bignum_montsqr_p256 but instructions are unscheduled. *) + +let bignum_montsqr_p256_interm1_ops:int list = [ + 0xa9400c27; (* ldp x7, x3, [x1] *) + 0x3dc00026; (* ldr q6, [x1] *) + 0xa9412029; (* ldp x9, x8, [x1, #16] *) + 0x3dc00432; (* ldr q18, [x1, #16] *) + 0x3dc0003b; (* ldr q27, [x1] *) + 0x2ebbc370; (* umull v16.2d, v27.2s, v27.2s *) + 0x6ebbc371; (* umull2 v17.2d, v27.4s, v27.4s *) + 0x0ea12b7e; (* xtn v30.2s, v27.2d *) + 0x4e9b5b7b; (* uzp2 v27.4s, v27.4s, v27.4s *) + 0x2ebec37b; (* umull v27.2d, v27.2s, v30.2s *) + 0x4e083e06; (* mov x6, v16.d[0] *) + 0x4e183e0c; (* mov x12, v16.d[1] *) + 0x4e083e2d; (* mov x13, v17.d[0] *) + 0x4e183e21; (* mov x1, v17.d[1] *) + 0x4e083f6f; (* mov x15, v27.d[0] *) + 0x4e183f6a; (* mov x10, v27.d[1] *) + 0xab0f84c4; (* adds x4, x6, x15, lsl #33 *) + 0xd35ffde6; (* lsr x6, x15, #31 *) + 0x9a06018f; (* adc x15, x12, x6 *) + 0xab0a85ad; (* adds x13, x13, x10, lsl #33 *) + 0xd35ffd46; (* lsr x6, x10, #31 *) + 0x9a06002c; (* adc x12, x1, x6 *) + 0x9b037ce6; (* mul x6, x7, x3 *) + 0x9bc37ce1; (* umulh x1, x7, x3 *) + 0xab0605e5; (* adds x5, x15, x6, lsl #1 *) + 0x93c6fc26; (* extr x6, x1, x6, #63 *) + 0xba0601aa; (* adcs x10, x13, x6 *) + 0xd37ffc26; (* lsr x6, x1, #63 *) + 0x9a06018f; (* adc x15, x12, x6 *) + 0xd3607c86; (* lsl x6, x4, #32 *) + 0xeb06008d; (* subs x13, x4, x6 *) + 0xd360fc8c; (* lsr x12, x4, #32 *) + 0xda0c0081; (* sbc x1, x4, x12 *) + 0xab0600a6; (* adds x6, x5, x6 *) + 0xba0c0145; (* adcs x5, x10, x12 *) + 0xba0d01ea; (* adcs x10, x15, x13 *) + 0x9a1f002f; (* adc x15, x1, xzr *) + 0xd3607ccd; (* lsl x13, x6, #32 *) + 0xeb0d00cc; (* subs x12, x6, x13 *) + 0xd360fcc1; (* lsr x1, x6, #32 *) + 0xda0100c6; (* sbc x6, x6, x1 *) + 0xab0d00b0; (* adds x16, x5, x13 *) + 0xba01014b; (* adcs x11, x10, x1 *) + 0xba0c01e2; (* adcs x2, x15, x12 *) + 0x9a1f00d1; (* adc x17, x6, xzr *) + 0x4e861a5e; (* uzp1 v30.4s, v18.4s, v6.4s *) + 0x4ea00a5b; (* rev64 v27.4s, v18.4s *) + 0x4e8618d2; (* uzp1 v18.4s, v6.4s, v6.4s *) + 0x4ea69f7b; (* mul v27.4s, v27.4s, v6.4s *) + 0x6ea02b65; (* uaddlp v5.2d, v27.4s *) + 0x4f6054a6; (* shl v6.2d, v5.2d, #32 *) + 0x2ebe8246; (* umlal v6.2d, v18.2s, v30.2s *) + 0x4e083cc4; (* mov x4, v6.d[0] *) + 0x4e183cc5; (* mov x5, v6.d[1] *) + 0x9bc97cea; (* umulh x10, x7, x9 *) + 0xeb0300e6; (* subs x6, x7, x3 *) + 0xda8624cd; (* cneg x13, x6, cc // cc = lo, ul, last *) + 0xda9f23ec; (* csetm x12, cc // cc = lo, ul, last *) + 0xeb090106; (* subs x6, x8, x9 *) + 0xda8624c6; (* cneg x6, x6, cc // cc = lo, ul, last *) + 0x9b067da1; (* mul x1, x13, x6 *) + 0x9bc67da6; (* umulh x6, x13, x6 *) + 0xda8c218f; (* cinv x15, x12, cc // cc = lo, ul, last *) + 0xca0f002c; (* eor x12, x1, x15 *) + 0xca0f00cd; (* eor x13, x6, x15 *) + 0xab0a0081; (* adds x1, x4, x10 *) + 0x9a1f0146; (* adc x6, x10, xzr *) + 0x9bc87c63; (* umulh x3, x3, x8 *) + 0xab050021; (* adds x1, x1, x5 *) + 0xba0300c6; (* adcs x6, x6, x3 *) + 0x9a1f0063; (* adc x3, x3, xzr *) + 0xab0500c6; (* adds x6, x6, x5 *) + 0x9a1f0063; (* adc x3, x3, xzr *) + 0xb10005ff; (* cmn x15, #0x1 *) + 0xba0c002c; (* adcs x12, x1, x12 *) + 0xba0d00c1; (* adcs x1, x6, x13 *) + 0x9a0f0063; (* adc x3, x3, x15 *) + 0xab040086; (* adds x6, x4, x4 *) + 0xba0c018d; (* adcs x13, x12, x12 *) + 0xba01002c; (* adcs x12, x1, x1 *) + 0xba030061; (* adcs x1, x3, x3 *) + 0x9a1f03e3; (* adc x3, xzr, xzr *) + 0xab1000c6; (* adds x6, x6, x16 *) + 0xba0b01a5; (* adcs x5, x13, x11 *) + 0xba02018a; (* adcs x10, x12, x2 *) + 0xba11002f; (* adcs x15, x1, x17 *) + 0x9a1f006d; (* adc x13, x3, xzr *) + 0xd3607cc3; (* lsl x3, x6, #32 *) + 0xeb0300cc; (* subs x12, x6, x3 *) + 0xd360fcc1; (* lsr x1, x6, #32 *) + 0xda0100c6; (* sbc x6, x6, x1 *) + 0xab0300a3; (* adds x3, x5, x3 *) + 0xba010145; (* adcs x5, x10, x1 *) + 0xba0c01ef; (* adcs x15, x15, x12 *) + 0xba0601ad; (* adcs x13, x13, x6 *) + 0x9a1f03ea; (* adc x10, xzr, xzr *) + 0xd3607c66; (* lsl x6, x3, #32 *) + 0xeb06006c; (* subs x12, x3, x6 *) + 0xd360fc61; (* lsr x1, x3, #32 *) + 0xda010063; (* sbc x3, x3, x1 *) + 0xab0600a6; (* adds x6, x5, x6 *) + 0xba0101ef; (* adcs x15, x15, x1 *) + 0xba0c01ad; (* adcs x13, x13, x12 *) + 0xba03014c; (* adcs x12, x10, x3 *) + 0x9a1f03e1; (* adc x1, xzr, xzr *) + 0x9b097d23; (* mul x3, x9, x9 *) + 0xab0300c5; (* adds x5, x6, x3 *) + 0x9b087d06; (* mul x6, x8, x8 *) + 0x9bc97d23; (* umulh x3, x9, x9 *) + 0xba0301ef; (* adcs x15, x15, x3 *) + 0xba0601ad; (* adcs x13, x13, x6 *) + 0x9bc87d03; (* umulh x3, x8, x8 *) + 0xba03018c; (* adcs x12, x12, x3 *) + 0x9a1f0021; (* adc x1, x1, xzr *) + 0x9b087d26; (* mul x6, x9, x8 *) + 0x9bc87d23; (* umulh x3, x9, x8 *) + 0xab0600c8; (* adds x8, x6, x6 *) + 0xba030069; (* adcs x9, x3, x3 *) + 0x9a1f03e3; (* adc x3, xzr, xzr *) + 0xab0801ea; (* adds x10, x15, x8 *) + 0xba0901af; (* adcs x15, x13, x9 *) + 0xba03018d; (* adcs x13, x12, x3 *) + 0xba1f002c; (* adcs x12, x1, xzr *) + 0xb2407fe3; (* mov x3, #0xffffffff // #4294967295 *) + 0xb10004a6; (* adds x6, x5, #0x1 *) + 0xfa030148; (* sbcs x8, x10, x3 *) + 0xb26083e3; (* mov x3, #0xffffffff00000001 // #-4294967295 *) + 0xfa1f01e9; (* sbcs x9, x15, xzr *) + 0xfa0301a1; (* sbcs x1, x13, x3 *) + 0xfa1f019f; (* sbcs xzr, x12, xzr *) + 0x9a8520c6; (* csel x6, x6, x5, cs // cs = hs, nlast *) + 0x9a8a2108; (* csel x8, x8, x10, cs // cs = hs, nlast *) + 0x9a8f2129; (* csel x9, x9, x15, cs // cs = hs, nlast *) + 0x9a8d2023; (* csel x3, x1, x13, cs // cs = hs, nlast *) + 0xa9002006; (* stp x6, x8, [x0] *) + 0xa9010c09; (* stp x9, x3, [x0, #16] *) + 0xd65f03c0; (* ret *) +];; + +let bignum_montsqr_p256_interm1_mc = + let charlist = List.concat_map + (fun op32 -> + [Char.chr (Int.logand op32 255); + Char.chr (Int.logand (Int.shift_right op32 8) 255); + Char.chr (Int.logand (Int.shift_right op32 16) 255); + Char.chr (Int.logand (Int.shift_right op32 24) 255)]) + bignum_montsqr_p256_interm1_ops in + let byte_list = Bytes.init (List.length charlist) (fun i -> List.nth charlist i) in + define_word_list "bignum_montsqr_p256_interm1_mc" (term_of_bytes byte_list);; + +let BIGNUM_MONTSQR_P256_INTERM1_EXEC = + ARM_MK_EXEC_RULE bignum_montsqr_p256_interm1_mc;; + +let bignum_montsqr_p256_interm1_core_mc_def, + bignum_montsqr_p256_interm1_core_mc, + BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC = + mk_sublist_of_mc "bignum_montsqr_p256_interm1_core_mc" + bignum_montsqr_p256_interm1_mc + (`0`,`LENGTH bignum_montsqr_p256_interm1_mc - 4`) + BIGNUM_MONTSQR_P256_INTERM1_EXEC;; + + +let equiv_input_states = new_definition + `!s1 s1' x z. + (equiv_input_states:(armstate#armstate)->int64->int64->bool) (s1,s1') x z <=> + (?a. + C_ARGUMENTS [z; x] s1 /\ + C_ARGUMENTS [z; x] s1' /\ + bignum_from_memory (x,4) s1 = a /\ + bignum_from_memory (x,4) s1' = a)`;; + +let equiv_output_states = new_definition + `!s1 s1' z. + (equiv_output_states:(armstate#armstate)->int64->bool) (s1,s1') z <=> + (?a. + bignum_from_memory (z,4) s1 = a /\ + bignum_from_memory (z,4) s1' = a)`;; + +let actions = [ + ("equal", 0, 1, 0, 1); + ("insert", 1, 1, 1, 2); + ("equal", 1, 2, 2, 3); + ("replace", 2, 24, 3, 29); +];; + +let actions2 = [ + ("equal", 24, 40, 29, 45); + ("replace", 40, 42, 45, 54); + ("equal", 42, 124, 54, 136); +];; + +let equiv_goal1 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 4)) + [(word pc:int64,LENGTH bignum_montsqr_p256_mc); + (word pc2:int64,LENGTH bignum_montsqr_p256_interm1_mc)]` + equiv_input_states + equiv_output_states + bignum_montsqr_p256_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montsqr_p256_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]`;; + + +let lemma1 = prove(`!(x:int64). + (word_mul + (word_zx (word_subword x (0,32):int32):int64) + (word_zx (word_subword x (0,32):int32):int64)) = + word (0 + + val ((word_zx:(64)word->(32)word) x) * + val ((word_zx:(64)word->(32)word) x))`, + CONV_TAC WORD_BLAST);; + +let lemma2 = prove(`!(x:int64). + (word_mul + (word_zx (word_subword x (32,32):int32):int64) + (word_zx (word_subword x (0,32):int32)):int64) = + word (0 + + val ((word_zx:(64)word->(32)word) x) * + val ((word_zx:(64)word->(32)word) (word_ushr x 32)))`, + CONV_TAC WORD_BLAST);; + +let lemma3 = prove (`!(x:int64). word_add x x = word_shl x 1`, + CONV_TAC WORD_BLAST);; + +let lemma4 = prove (`!(x:int64). + (word_mul + ((word_zx:(32)word->(64)word) (word_subword x (32,32))) + ((word_zx:(32)word->(64)word) (word_subword x (32,32)))) = + (word (0 + + val ((word_zx:(64)word->(32)word) (word_ushr x 32)) * + val ((word_zx:(64)word->(32)word) (word_ushr x 32))))`, + CONV_TAC WORD_BLAST);; + +let pth = prove( + `(val (x:int64) * val (y:int64)) MOD 2 EXP 128 = val (x:int64) * val (y:int64)`, + IMP_REWRITE_TAC[MOD_LT] THEN + REWRITE_TAC[ARITH_RULE`2 EXP 128 = 2 EXP 64 * 2 EXP 64`] THEN + MAP_EVERY (fun t -> ASSUME_TAC (SPEC t VAL_BOUND_64)) [`x:int64`;`y:int64`] THEN + IMP_REWRITE_TAC[LT_MULT2]);; + +let lemma5 = prove ( + `!(a'_0:int64) (a'_1:int64). + (word_add + (word_shl + ((word:num->(64)word) ((val a'_0 * val a'_1) DIV 2 EXP 64)) + 1) + ((word:num->(64)word) + (bitval + (2 EXP 64 <= + val + ((word:num->(64)word) (0 + val a'_0 * val a'_1)) + + val + ((word:num->(64)word) (0 + val a'_0 * val a'_1)))))) = + + ((word_subword:(128)word->num#num->(64)word) + ((word_join:(64)word->(64)word->(128)word) + (word ((val a'_0 * val a'_1) DIV 2 EXP 64)) + (word (0 + val a'_0 * val a'_1))) + (63,64))`, + + REPEAT STRIP_TAC THEN + SUBGOAL_THEN `?(a:int128). val a = val (a'_0:int64) * val (a'_1:int64)` MP_TAC THENL [ + EXISTS_TAC `word(val (a'_0:int64) * val (a'_1:int64)):int128` THEN + REWRITE_TAC[VAL_WORD;DIMINDEX_128;pth]; + ALL_TAC + ] THEN + STRIP_TAC THEN FIRST_X_ASSUM (fun th -> REWRITE_TAC[GSYM th]) THEN + REWRITE_TAC[bitval; WORD_BLAST + `val(a:int128) DIV 2 EXP 64 = val(word_ushr a 64)`] THEN + SPEC_TAC (`a:int128`,`a:int128`) THEN + BITBLAST_TAC);; + +let lemma6 = prove( + `!(a'_0:int64) (a'_1:int64). + ((word:num->(64)word) + (bitval + (2 EXP 64 <= + val ((word:num->(64)word) ((val a'_0 * val a'_1) DIV 2 EXP 64)) + + val ((word:num->(64)word) ((val a'_0 * val a'_1) DIV 2 EXP 64)) + + bitval + (2 EXP 64 <= + val ((word:num->(64)word) (0 + val a'_0 * val a'_1)) + + val ((word:num->(64)word) (0 + val a'_0 * val a'_1)))))) = + + (word_ushr + ((word:num->(64)word) ((val a'_0 * val a'_1) DIV 2 EXP 64)) + 63)`, + + REPEAT STRIP_TAC THEN + SUBGOAL_THEN `?(a:int128). val a = val (a'_0:int64) * val (a'_1:int64)` MP_TAC THENL [ + EXISTS_TAC `word(val (a'_0:int64) * val (a'_1:int64)):int128` THEN + REWRITE_TAC[VAL_WORD;DIMINDEX_128;pth]; + ALL_TAC + ] THEN + STRIP_TAC THEN FIRST_X_ASSUM (fun th -> REWRITE_TAC[GSYM th]) THEN + REWRITE_TAC[bitval; WORD_BLAST + `val(a:int128) DIV 2 EXP 64 = val(word_ushr a 64)`] THEN + SPEC_TAC (`a:int128`,`a:int128`) THEN + BITBLAST_TAC);; + + + +let _org_extra_word_CONV = !extra_word_CONV;; +extra_word_CONV := + [GEN_REWRITE_CONV I [WORD_BITMANIP_SIMP_LEMMAS; WORD_SQR64_LO; WORD_SQR64_HI; + WORD_MUL64_LO]] + @ (!extra_word_CONV);; + +let BIGNUM_MONTSQR_P256_EQUIV1 = prove(equiv_goal1, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P256_EXEC;BIGNUM_MONTSQR_P256_CORE_EXEC; + BIGNUM_MONTSQR_P256_INTERM1_EXEC; + BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Start *) + EQUIV_STEPS_TAC actions + BIGNUM_MONTSQR_P256_CORE_EXEC + BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC THEN + + (* checker: + (fun (asl,g) -> + let a = List.filter (fun (_,th) -> + let t = concl th in + if not (is_eq t) then false else + let lhs = fst (dest_eq t) in + lhs = `read X15 s24` || lhs = `read X4 s29'`) asl in + if not (snd (dest_eq (concl (snd (List.nth a 0)))) = + snd (dest_eq (concl (snd (List.nth a 1))))) then + failwith ".." + else ALL_TAC (asl,g)) + *) + RULE_ASSUM_TAC(REWRITE_RULE[ + lemma1;lemma2; (* X15 of prog1 = X4 of prog2 *) + lemma3;lemma4; (* X16 of prog1 = X5 of prog2 *) + lemma5; (* X17 of prog1 = X10 of prog2 *) + lemma6; (* X1 of prog1 = X15 of prog2 *) + ]) THEN + + EQUIV_STEPS_TAC actions2 + BIGNUM_MONTSQR_P256_CORE_EXEC + BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,4) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + +extra_word_CONV := _org_extra_word_CONV;; + + +(****************************************************************************** + The second program equivalence between the intermediate program and + fully optimized program +******************************************************************************) + +let bignum_montsqr_p256_neon_mc = + define_from_elf "bignum_montsqr_p256_neon_mc" + "arm/p256/bignum_montsqr_p256_neon.o";; + +let BIGNUM_MONTSQR_P256_NEON_EXEC = + ARM_MK_EXEC_RULE bignum_montsqr_p256_neon_mc;; + +let bignum_montsqr_p256_neon_core_mc_def, + bignum_montsqr_p256_neon_core_mc, + BIGNUM_MONTSQR_P256_NEON_CORE_EXEC = + mk_sublist_of_mc "bignum_montsqr_p256_neon_core_mc" + bignum_montsqr_p256_neon_mc + (`0`,`LENGTH bignum_montsqr_p256_neon_mc - 4`) + BIGNUM_MONTSQR_P256_NEON_EXEC;; + + +let equiv_goal2 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 4)) + [(word pc:int64,LENGTH bignum_montsqr_p256_interm1_mc); + (word pc2:int64,LENGTH bignum_montsqr_p256_neon_mc)]` + equiv_input_states + equiv_output_states + bignum_montsqr_p256_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]` + bignum_montsqr_p256_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]`;; + +(* Line numbers from bignum_montsqr_p256_neon_core_mc (the fully optimized + prog.) to bignum_montsqr_p256_interm1_core_mc (the intermediate prog.) *) +let inst_map = [ + 5;1;4;2;3;9;8;24;47;6;10;49;56;55;12;7;11;46;50;28;23;15;58;57;14;13;59;16;60;63;17;26;18;48;51;19;20;21;30;52;22;25;32;62;27;29;31;33;34;53;61;35;38;68;36;40;37;39;41;42;54;43;116;44;65;45;66;67;69;64;115;70;71;72;73;74;108;75;76;112;77;78;79;80;81;82;83;106;84;90;85;88;109;86;124;87;117;118;127;119;89;91;92;93;97;99;94;95;96;98;100;101;102;103;104;105;107;110;111;113;114;120;121;122;123;125;126;128;129;130;131;133;134;132;136;135;137 +];; + +(* (state number, (equation, fresh var)) *) +let state_to_abbrevs: (int * thm) list ref = ref [];; + +let BIGNUM_MONTSQR_P256_EQUIV2 = prove( + equiv_goal2, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P256_INTERM1_EXEC;BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC; + BIGNUM_MONTSQR_P256_NEON_EXEC; + BIGNUM_MONTSQR_P256_NEON_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Left *) + ARM_STEPS'_AND_ABBREV_TAC BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC (1--136) state_to_abbrevs THEN + + (* Right *) + ARM_STEPS'_AND_REWRITE_TAC BIGNUM_MONTSQR_P256_NEON_CORE_EXEC (1--136) inst_map state_to_abbrevs THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,4) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + + +(****************************************************************************** + Use transitivity of two program equivalences to prove end-to-end + correctness +******************************************************************************) + +let equiv_goal = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 4)) + [(word pc:int64,LENGTH bignum_montsqr_p256_mc); + (word pc2:int64,LENGTH bignum_montsqr_p256_neon_mc)]` + equiv_input_states + equiv_output_states + bignum_montsqr_p256_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 4)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montsqr_p256_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)]`;; + +let equiv_output_states_TRANS = prove( + `!s s2 s' + z. equiv_output_states (s,s') z /\ equiv_output_states (s',s2) z + ==> equiv_output_states (s,s2) z`, + MESON_TAC[equiv_output_states]);; + +let BIGNUM_MONTSQR_P256_EQUIV = prove(equiv_goal, + + REPEAT STRIP_TAC THEN + SUBGOAL_THEN + `?pc3. + ALL (nonoverlapping (z,8 * 4)) + [word pc:int64, LENGTH bignum_montsqr_p256_mc; + word pc3:int64, LENGTH bignum_montsqr_p256_interm1_mc] /\ + ALL (nonoverlapping (z,8 * 4)) + [word pc3:int64, LENGTH bignum_montsqr_p256_interm1_mc; + word pc2:int64, LENGTH bignum_montsqr_p256_neon_mc] /\ + nonoverlapping (x,8 * 4) + (word pc3:int64, LENGTH bignum_montsqr_p256_interm1_mc) /\ + 4 divides val (word pc3:int64)` + MP_TAC THENL [ + FIRST_X_ASSUM MP_TAC THEN + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + ASM_REWRITE_TAC + [ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P256_INTERM1_EXEC;BIGNUM_MONTSQR_P256_NEON_EXEC; + BIGNUM_MONTSQR_P256_EXEC;GSYM CONJ_ASSOC] THEN + STRIP_TAC THEN + ASM_REWRITE_TAC[] THEN + FIND_HOLE_TAC; + + ALL_TAC + ] THEN + STRIP_TAC THEN + + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P256_EQUIV1 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P256_EQUIV2 th))) THEN + FIRST_X_ASSUM (fun c1 -> + FIRST_X_ASSUM (fun c2 -> + MP_TAC (REWRITE_RULE [] (MATCH_MP ENSURES2_CONJ2 (CONJ c1 c2))) + )) THEN + + (* break 'ALL nonoverlapping' in assumptions *) + RULE_ASSUM_TAC (REWRITE_RULE[ + ALLPAIRS;ALL; + BIGNUM_MONTSQR_P256_EXEC;BIGNUM_MONTSQR_P256_NEON_EXEC; + BIGNUM_MONTSQR_P256_INTERM1_EXEC; + NONOVERLAPPING_CLAUSES]) THEN + SPLIT_FIRST_CONJ_ASSUM_TAC THEN + + MATCH_MP_TAC ENSURES2_WEAKEN THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[TAUT `(p /\ q /\ r) /\ p /\ q /\ r' <=> p /\ q /\ r /\ r'`] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc3,LENGTH bignum_montsqr_p256_interm1_core_mc)) + bignum_montsqr_p256_interm1_core_mc + (write PC (word pc3) s')` THEN + REPEAT CONJ_TAC THEN (TRY ( + REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN + ASM_REWRITE_TAC[] THEN NO_TAC + )) THENL [ + REWRITE_TAC[aligned_bytes_loaded;bytes_loaded] THEN + RULE_ASSUM_TAC (REWRITE_RULE[aligned_bytes_loaded]) THEN + ASM_REWRITE_TAC[] THEN + MATCH_MP_TAC READ_OVER_WRITE_MEMORY_BYTELIST THEN + REWRITE_TAC[BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC] THEN + ARITH_TAC; + + UNDISCH_TAC `equiv_input_states (s,s') x z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + EXISTS_TAC `a:num` THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN ASM_REWRITE_TAC[]; + REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN ASM_REWRITE_TAC[]; + EXPAND_RHS_TAC THEN READ_OVER_WRITE_ORTHOGONAL_TAC; + ]; + + UNDISCH_TAC `equiv_input_states (s,s') x z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + EXISTS_TAC `a:num` THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN ASM_REWRITE_TAC[]; + REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN ASM_REWRITE_TAC[]; + EXPAND_RHS_TAC THEN READ_OVER_WRITE_ORTHOGONAL_TAC; + ] + ]; + + REPEAT GEN_TAC THEN STRIP_TAC THEN + ASM_REWRITE_TAC[] THEN ASM_MESON_TAC[equiv_output_states_TRANS]; + + SUBSUMED_MAYCHANGE_TAC + ]);; + + + + +let event_n_at_pc_goal = mk_eventually_n_at_pc_statement + `nonoverlapping (word pc:int64,LENGTH bignum_montsqr_p256_mc) (z:int64,8 * 4)` + [`z:int64`;`x:int64`] (*pc_mc_ofs*)0 bignum_montsqr_p256_core_mc (*pc_ofs*)0 + `\s0. C_ARGUMENTS [z;x] s0`;; + +let BIGNUM_MONTSQR_P256_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, + + REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_EXEC; + BARRIER_INST_BYTES_LENGTH] THEN + REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN + SUBGOAL_THEN `4 divides (LENGTH bignum_montsqr_p256_core_mc)` + (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; + BIGNUM_MONTSQR_P256_CORE_EXEC]) THENL [ + REWRITE_TAC[BIGNUM_MONTSQR_P256_CORE_EXEC] THEN CONV_TAC NUM_DIVIDES_CONV; + ALL_TAC] THEN + REPEAT_N 3 GEN_TAC THEN + STRIP_TAC THEN + (* now start..! *) + X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN + (* eventually ==> eventually_n *) + PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_MONTSQR_P256_CORE_EXEC);; + + +(****************************************************************************** + Inducing BIGNUM_MONTSQR_P256_NEON_SUBROUTINE_CORRECT + from BIGNUM_MONTSQR_P256_CORE_CORRECT +******************************************************************************) + +let BIGNUM_MONTSQR_P256_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTSQR_P256_EXEC + BIGNUM_MONTSQR_P256_CORE_EXEC + BIGNUM_MONTSQR_P256_CORE_CORRECT + BIGNUM_MONTSQR_P256_EVENTUALLY_N_AT_PC;; + + +let BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT = prove( + `!z x a pc2. + nonoverlapping (word pc2,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montsqr_p256_neon_core_mc /\ + read PC s = word pc2 /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = word (pc2 + 544) /\ + (a EXP 2 <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (z:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (x:int64,8 * 4) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P256_EQUIV BIGNUM_MONTSQR_P256_CORE_CORRECT_N + [BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P256_EXEC; + BIGNUM_MONTSQR_P256_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes))) + (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC); + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_MONTSQR_P256_NEON_CORRECT = time prove + (`!z x a pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_neon_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = word (pc + 544) /\ + (a EXP 2 <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montsqr_p256_neon_core_mc_def;BIGNUM_MONTSQR_P256_NEON_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + +let BIGNUM_MONTSQR_P256_NEON_SUBROUTINE_CORRECT = time prove + (`!z x a pc returnaddress. + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_neon_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = returnaddress /\ + (a EXP 2 <= 2 EXP 256 * p_256 + ==> bignum_from_memory (z,4) s = + (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REWRITE_TAC[BIGNUM_MONTSQR_P256_NEON_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P256_NEON_EXEC + (REWRITE_RULE[BIGNUM_MONTSQR_P256_NEON_EXEC] BIGNUM_MONTSQR_P256_NEON_CORRECT));; + + +(****************************************************************************** + Inducing BIGNUM_AMONTSQR_P256_NEON_SUBROUTINE_CORRECT + from BIGNUM_AMONTSQR_P256_CORE_CORRECT +******************************************************************************) + +let BIGNUM_AMONTSQR_P256_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTSQR_P256_EXEC + BIGNUM_MONTSQR_P256_CORE_EXEC + BIGNUM_AMONTSQR_P256_CORE_CORRECT + BIGNUM_MONTSQR_P256_EVENTUALLY_N_AT_PC;; + +let BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT = prove( + `!z x a pc2. + nonoverlapping (word pc2,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montsqr_p256_neon_core_mc /\ + read PC s = word pc2 /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = word (pc2 + 544) /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (z:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (x:int64,8 * 4) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P256_EQUIV BIGNUM_AMONTSQR_P256_CORE_CORRECT_N + [BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P256_EXEC; + BIGNUM_MONTSQR_P256_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes))) + (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC); + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_AMONTSQR_P256_NEON_CORRECT = time prove + (`!z x a pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_neon_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = word (pc + 544) /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REPEAT STRIP_TAC THEN + FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT th)) THEN + REWRITE_TAC[ensures] THEN + DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN + EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_montsqr_p256_neon_core_mc_def;BIGNUM_MONTSQR_P256_NEON_EXEC; + WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN + CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN + ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + +let BIGNUM_AMONTSQR_P256_NEON_SUBROUTINE_CORRECT = time prove + (`!z x a pc returnaddress. + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_neon_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,4) s = a) + (\s. read PC s = returnaddress /\ + (bignum_from_memory (z,4) s == + inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 4)])`, + REWRITE_TAC[BIGNUM_MONTSQR_P256_NEON_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P256_NEON_EXEC + (REWRITE_RULE[BIGNUM_MONTSQR_P256_NEON_EXEC] BIGNUM_AMONTSQR_P256_NEON_CORRECT));; + diff --git a/arm/proofs/bignum_mul_8_16.ml b/arm/proofs/bignum_mul_8_16.ml index 055c2b29..0e0f95c2 100644 --- a/arm/proofs/bignum_mul_8_16.ml +++ b/arm/proofs/bignum_mul_8_16.ml @@ -481,18 +481,13 @@ let bignum_mul_8_16_mc = define_assert_from_elf "bignum_mul_8_16_mc" "arm/fastmu let BIGNUM_MUL_8_16_EXEC = ARM_MK_EXEC_RULE bignum_mul_8_16_mc;; (* bignum_mul_8_16 without callee-save register spilling. *) -let bignum_mul_8_16_core_mc_def = define - `bignum_mul_8_16_core_mc = - SUB_LIST (12, LENGTH bignum_mul_8_16_mc - 28) bignum_mul_8_16_mc`;; - -let bignum_mul_8_16_core_mc = - CONV_RULE (REWRITE_CONV [BIGNUM_MUL_8_16_EXEC] THENC - ONCE_DEPTH_CONV NUM_SUB_CONV THENC - REWRITE_CONV [bignum_mul_8_16_mc] THENC - RAND_CONV SUB_LIST_CONV) - bignum_mul_8_16_core_mc_def;; - -let BIGNUM_MUL_8_16_CORE_EXEC = ARM_MK_EXEC_RULE bignum_mul_8_16_core_mc;; +let bignum_mul_8_16_core_mc_def, + bignum_mul_8_16_core_mc, + BIGNUM_MUL_8_16_CORE_EXEC = + mk_sublist_of_mc "bignum_mul_8_16_core_mc" + bignum_mul_8_16_mc + (`12`,`LENGTH bignum_mul_8_16_mc - 28`) + BIGNUM_MUL_8_16_EXEC;; (* ------------------------------------------------------------------------- *) (* Lemmas to halve the number of case splits, useful for efficiency. *) diff --git a/arm/proofs/bignum_mul_8_16_neon.ml b/arm/proofs/bignum_mul_8_16_neon.ml index ae6143c1..ac0f57c5 100644 --- a/arm/proofs/bignum_mul_8_16_neon.ml +++ b/arm/proofs/bignum_mul_8_16_neon.ml @@ -520,6 +520,15 @@ let bignum_mul_8_16_neon_mc = define_assert_from_elf "bignum_mul_8_16_neon_mc" " let BIGNUM_MUL_8_16_NEON_EXEC = ARM_MK_EXEC_RULE bignum_mul_8_16_neon_mc;; +(* bignum_mul_8_16_neon_mc without callee-save register spilling. *) +let bignum_mul_8_16_neon_core_mc_def, + bignum_mul_8_16_neon_core_mc, + BIGNUM_MUL_8_16_NEON_CORE_EXEC = + mk_sublist_of_mc "bignum_mul_8_16_neon_core_mc" + bignum_mul_8_16_neon_mc + (`12`,`LENGTH bignum_mul_8_16_neon_mc - 28`) + BIGNUM_MUL_8_16_NEON_EXEC;; + (** Define equivalence relations at the begin and end of the two programs (after stack push/pops stripped) **) @@ -550,11 +559,26 @@ let bignum_mul_8_16_equiv_output_states = new_definition bignum_from_memory (z,16) s1 = c /\ bignum_from_memory (z,16) s1' = c)`;; -let must_be_smarter_tactic:tactic = - let eqeq = METIS[] `!x1 x2 y1 y2 a. (x1+x2=a /\ y1+y2=a) ==> x1+x2=y1+y2` in - FIRST_X_ASSUM (fun th1 -> - FIRST_X_ASSUM (fun th2 -> - PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM (MATCH_MP eqeq (CONJ th1 th2))));; +let equiv_goal = mk_equiv_statement + `ALL (nonoverlapping (z,8 * 16)) [ + (word pc:int64,LENGTH bignum_mul_8_16_mc); + (word pc2:int64,LENGTH bignum_mul_8_16_neon_mc); + (x,8 * 8); (y,8 * 8)]` + bignum_mul_8_16_equiv_input_states + bignum_mul_8_16_equiv_output_states + bignum_mul_8_16_core_mc 12 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; + X9; X10; X11; X12; X13; X14; X15; X16; + X17; X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS` + bignum_mul_8_16_neon_core_mc 12 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; + X9; X10; X11; X12; X13; X14; X15; X16; + X17; X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5] ,, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS`;; let _org_extra_word_CONV = !extra_word_CONV;; extra_word_CONV := @@ -583,65 +607,32 @@ let actions = [ ];; let BIGNUM_MUL_8_16_EQUIV = prove( - `!pc pc2 z x y. - ALL (nonoverlapping (z,8 * 16)) [ - (word pc,LENGTH bignum_mul_8_16_mc); (word pc2,LENGTH bignum_mul_8_16_neon_mc); - (x,8 * 8); (y,8 * 8)] - ==> ensures2 arm - (\(s,s2). aligned_bytes_loaded s (word (pc + 0x0c)) bignum_mul_8_16_core_mc /\ - read PC s = word (pc + 0x0c) /\ - aligned_bytes_loaded s2 (word pc2) bignum_mul_8_16_neon_mc /\ - read PC s2 = word (pc2 + 0x0c) /\ - bignum_mul_8_16_equiv_input_states (s,s2) x y z) - (\(s,s2). aligned_bytes_loaded s (word (pc + 0x0c)) bignum_mul_8_16_core_mc /\ - read PC s = word (pc + 4*(461-1)) /\ - aligned_bytes_loaded s2 (word pc2) bignum_mul_8_16_neon_mc /\ - read PC s2 = word (pc2 + 4*(497-1)) /\ - bignum_mul_8_16_equiv_output_states (s,s2) x y z) - (\(s,s2) (s',s2'). - ((MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; - X9; X10; X11; X12; X13; X14; X15; X16; - X17; X19; X20; X21; X22; X23; X24] ,, - MAYCHANGE [memory :> bytes(z,8 * 16)] ,, - MAYCHANGE SOME_FLAGS) s s') /\ - ((MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; - X9; X10; X11; X12; X13; X14; X15; X16; - X17; X19; X20; X21; X22; X23; X24] ,, - MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5] ,, - MAYCHANGE [memory :> bytes(z,8 * 16)] ,, - MAYCHANGE SOME_FLAGS) s2 s2')) - (\s. (4*(461-1) - 0x0c) DIV 4) (\s2. (4*(497-1) - 0x0c) DIV 4)`, + equiv_goal, + REWRITE_TAC[SOME_FLAGS;ALL;NONOVERLAPPING_CLAUSES;BIGNUM_MUL_8_16_EXEC; BIGNUM_MUL_8_16_NEON_EXEC;BIGNUM_MUL_8_16_CORE_EXEC] THEN REPEAT STRIP_TAC THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN (** Initialize **) EQUIV_INITIATE_TAC bignum_mul_8_16_equiv_input_states THEN - REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN - (* necessary to run ldr qs. TODO: automatize this process *) - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 x) s0'` `a'_1:int64` `a'_0:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 16))) s0'` `a'_3:int64` `a'_2:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 32))) s0'` `a'_5:int64` `a'_4:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 48))) s0'` `a'_7:int64` `a'_6:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 y) s0'` `b'_1:int64` `b'_0:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add y (word 16))) s0'` `b'_3:int64` `b'_2:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add y (word 32))) s0'` `b'_5:int64` `b'_4:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add y (word 48))) s0'` `b'_7:int64` `b'_6:int64` THEN - REPEAT_N 2 must_be_smarter_tactic THEN + REPEAT(FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs. *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN (* Abbreviate pc to help tactics. *) + ASSERT_NONOVERLAPPING_MODULO_TAC + `nonoverlapping_modulo (2 EXP 64) + (val (z:int64),8 * 16) (pc + 12,LENGTH bignum_mul_8_16_core_mc)` + BIGNUM_MUL_8_16_CORE_EXEC THEN + ASSERT_NONOVERLAPPING_MODULO_TAC + `nonoverlapping_modulo (2 EXP 64) + (val (z:int64),8 * 16) (pc2 + 12,LENGTH bignum_mul_8_16_neon_core_mc)` + BIGNUM_MUL_8_16_NEON_CORE_EXEC THEN ABBREV_TAC `pc' = pc + 12` THEN - SUBGOAL_THEN `nonoverlapping_modulo (2 EXP 64) - (val (z:int64),8 * 16) (pc',LENGTH bignum_mul_8_16_core_mc)` - (fun th -> ASSUME_TAC (REWRITE_RULE[BIGNUM_MUL_8_16_CORE_EXEC] th)) THENL [ - REWRITE_TAC[BIGNUM_MUL_8_16_CORE_EXEC] THEN EXPAND_TAC "pc'" THEN - NONOVERLAPPING_TAC; - ALL_TAC] THEN + ABBREV_TAC `pc2' = pc2 + 12` THEN (* The main simulation part *) - MAP_EVERY - (fun action -> - EQUIV_STEP_TAC action BIGNUM_MUL_8_16_CORE_EXEC BIGNUM_MUL_8_16_NEON_EXEC) - actions THEN + EQUIV_STEPS_TAC actions + BIGNUM_MUL_8_16_CORE_EXEC BIGNUM_MUL_8_16_NEON_CORE_EXEC THEN (* We finished simulation of the programs. Prove postconditions *) ENSURES_FINAL_STATE'_TAC THEN ENSURES_FINAL_STATE'_TAC THEN @@ -650,18 +641,21 @@ let BIGNUM_MUL_8_16_EQUIV = prove( (** SUBGOAL 0. PC **) EXPAND_TAC "pc'" THEN CONV_TAC WORD_RULE; - (** SUBGOAL 1. Outputs **) + (** SUBGOAL 1. PC2 **) + EXPAND_TAC "pc2'" THEN CONV_TAC WORD_RULE; + + (** SUBGOAL 2. Outputs **) ASM_REWRITE_TAC[bignum_mul_8_16_equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,8) state`; C_ARGUMENTS] THEN REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]) THEN ASM_REWRITE_TAC[BIGNUM_EXPAND_CONV `bignum_from_memory (p,16) st`]; - (** SUBGOAL 2. MAYCHANGE left **) + (** SUBGOAL 3. MAYCHANGE left **) DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN MONOTONE_MAYCHANGE_TAC; - (** SUBGOAL 3. MAYCHANGE right **) + (** SUBGOAL 4. MAYCHANGE right **) DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN MONOTONE_MAYCHANGE_TAC ]);; @@ -675,33 +669,24 @@ extra_word_CONV := _org_extra_word_CONV;; BIGNUM_MUL_8_16_CORE_CORRECT that has # instructions to step. Proving the updated version consists of two steps: - (1) An updated 'ensures' that has a 'barrier' attached at the end of - the core + + (1) A proof stating that, after a specified amount of steps, + a postcondition will be satisfied at the end of bignum_mul_8_16_core_mc + thanks to the explicit description of PC. **) -let BIGNUM_MUL_8_16_CORE_WITH_BARRIER_CORRECT = - prove_correct_barrier_appended BIGNUM_MUL_8_16_CORE_CORRECT - `bignum_mul_8_16_core_mc` BIGNUM_MUL_8_16_CORE_EXEC;; +let event_n_at_pc_goal = mk_eventually_n_at_pc_statement + `ALL (nonoverlapping (z,8 * 16)) [ + (word pc:int64,LENGTH (bignum_mul_8_16_mc)); + (x:int64,8 * 8); (y:int64,8 * 8)]` + [`z:int64`;`x:int64`;`y:int64`] 12 bignum_mul_8_16_core_mc 12 + `(\s0. C_ARGUMENTS [z;x;y] s0)`;; -(** - (2) A proof stating that, after a specified amount of steps, - any postcondition will be satisfied at the end of the program. -**) +let BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, -let BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC = prove( - `!pc z x y. - ALL (nonoverlapping (z,8 * 16)) [ - (word pc,LENGTH (bignum_mul_8_16_mc)); (x,8 * 8); (y,8 * 8)] - ==> eventually_n_at_pc - (pc+0x0c) (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes) - (pc+0x0c) (pc+0x0c+LENGTH bignum_mul_8_16_core_mc) - ((LENGTH bignum_mul_8_16_core_mc) DIV 4) - (\s0. C_ARGUMENTS [z;x;y] s0)`, - (* Converted by thenify.py *) REWRITE_TAC[LENGTH_APPEND;BIGNUM_MUL_8_16_CORE_EXEC;BIGNUM_MUL_8_16_EXEC; BARRIER_INST_BYTES_LENGTH] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_DIVIDES_CONV) THEN - REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN + REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN SUBGOAL_THEN `4 divides (LENGTH bignum_mul_8_16_core_mc)` (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; BIGNUM_MUL_8_16_CORE_EXEC]) THENL [ @@ -711,17 +696,11 @@ let BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC = prove( (* nonoverlapping *) STRIP_TAC THEN (* Abbreviate pc+12 as pc' because EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC likes pc without offsets *) + ASSERT_NONOVERLAPPING_MODULO_TAC + `nonoverlapping_modulo (2 EXP 64) (val (z:int64), 128) + (pc+12, LENGTH bignum_mul_8_16_mc - 12)` + BIGNUM_MUL_8_16_EXEC THEN ABBREV_TAC `pc'=pc+12` THEN - SUBGOAL_THEN - `nonoverlapping_modulo (2 EXP 64) (val (z:int64), 128) - (pc', LENGTH bignum_mul_8_16_mc - 12)` - MP_TAC THENL [ - REWRITE_TAC[BIGNUM_MUL_8_16_EXEC] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_SUB_CONV) THEN - EXPAND_TAC "pc'" THEN NONOVERLAPPING_TAC; ALL_TAC] THEN - DISCH_THEN (fun th -> - let th = REWRITE_RULE[BIGNUM_MUL_8_16_EXEC] th in - ASSUME_TAC(CONV_RULE (ONCE_DEPTH_CONV NUM_SUB_CONV) th)) THEN SUBGOAL_THEN `pc+0x0c+LENGTH bignum_mul_8_16_core_mc = pc'+LENGTH bignum_mul_8_16_core_mc` MP_TAC THENL [ @@ -734,62 +713,23 @@ let BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC = prove( (* now start..! *) X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN - DISCH_THEN (LABEL_TAC "HEVENTUALLY") THEN - REWRITE_TAC[eventually_n] THEN - CONJ_TAC THENL [ - (** SUBGOAL 1 **) - (* `!s'. steps arm .. s s' ==> P s s'` *) - X_GEN_TAC `s_final:armstate` THEN UNDISCH_LABEL_TAC "HEVENTUALLY" THEN - REPEAT_I_N 0 457 - (fun i -> EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC - BIGNUM_MUL_8_16_CORE_EXEC `s0:armstate` 0 i 457 THEN - DISCARD_OLDSTATE_TAC ("s" ^ (if i = 456 then "_final" else string_of_int (i+1)))) THEN - (* match last step: utilize the barrier instruction *) - ONCE_REWRITE_TAC[eventually_CASES] THEN - ASM_REWRITE_TAC[] THEN - STRIP_TAC THEN - (let stuck_th = (REWRITE_RULE[TAUT `(P/\Q/\R==>S) <=> (P==>Q==>R==>S)`] - ALIGNED_BYTES_LOADED_BARRIER_ARM_STUCK) in - FIRST_ASSUM (fun th -> - (* th is 'aligned_bytes_loaded s_final (word_add (word pc') (word 1828)) barrier_inst_bytes' *) - let th = REWRITE_RULE [WORD_RULE `word_add (word x) (word y):int64 = word (x+y)`] th in - let th2 = MATCH_MP stuck_th th in - ASM_MESON_TAC[th2])); - - (** SUBGOAL 2 **) - ONCE_REWRITE_TAC[ - ITAUT`(!x y. P y /\ Q x y ==> R x y) <=> (!y. P y ==> !x. Q x y ==> R x y)`] THEN - X_GEN_TAC `n0:num` THEN STRIP_TAC THEN GEN_TAC THEN - REPEAT_I_N 0 457 (fun i -> - let n = mk_var("n" ^ (string_of_int i),`:num`) in - let np1 = mk_var("n" ^ (string_of_int (i+1)),`:num`) in - DISJ_CASES_THEN2 - SUBST1_TAC - (X_CHOOSE_THEN np1 - (fun th -> SUBST_ALL_TAC (REWRITE_RULE[ARITH_RULE`SUC x=1+x`] th))) - (SPEC n num_CASES) THENL [ - (** SUBGOAL 1 **) - SIMPLIFY_STEPS_0_TAC THEN - SOLVE_EXISTS_ARM_TAC (snd (CONJ_PAIR BIGNUM_MUL_8_16_CORE_EXEC)); - - (** SUBGOAL 2 **) - EVENTUALLY_STEPS_EXISTS_STEP_TAC BIGNUM_MUL_8_16_CORE_EXEC i (4*(i+1)) THEN - FIRST_X_ASSUM (fun th -> - let res = MATCH_MP (ARITH_RULE`!x. 1+x x<(n-1)`) th in - ASSUME_TAC (CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) res)) - ]) THEN - ASM_ARITH_TAC (* last is: 'n < 0' *) - ]);; + (* eventually ==> eventually_n *) + PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_MUL_8_16_CORE_EXEC);; -(** By combining BIGNUM_MUL_8_16_CORE_WITH_BARRIER_CORRECT and - BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC, now we can prove - ensures_n version of BIGNUM_MUL_8_16_CORE_CORRECT +(** + (2) An updated 'ensures_n' that has an imaginary 'barrier' instruction + attached at the end of the core, and is given 'n' which is the + number of small steps needed to reach at its postcondition. + + This needs BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC. **) + let BIGNUM_MUL_8_16_CORE_CORRECT_N = - prove_correct_n [BIGNUM_MUL_8_16_EXEC;BIGNUM_MUL_8_16_CORE_EXEC] - BIGNUM_MUL_8_16_CORE_WITH_BARRIER_CORRECT - BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC - `\(s:armstate). (0x730 - 0xc) DIV 4`;; + prove_correct_n + BIGNUM_MUL_8_16_EXEC + BIGNUM_MUL_8_16_CORE_EXEC + BIGNUM_MUL_8_16_CORE_CORRECT + BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC;; (** Finally, we prove the correctness of core of bignum_mul_8_16_neon @@ -825,102 +765,52 @@ let BIGNUM_MUL_8_16_NEON_CORRECT = prove( ALL (nonoverlapping (word pc,LENGTH bignum_mul_8_16_mc)) [(x:int64,8 * 8); (y:int64,8 * 8)] /\ 4 divides val (word pc:int64)` MP_TAC THENL [ - (** SUBGOAL 1 **) - REWRITE_TAC[LENGTH_APPEND;BIGNUM_MUL_8_16_EXEC;BARRIER_INST_BYTES_LENGTH; - NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN - MAP_EVERY (fun t -> ASSUME_TAC (SPEC t VAL_BOUND_64)) [`x:int64`;`y:int64`;`z:int64`] THEN - MAP_EVERY (fun (v:term) -> - ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num 1984))) THEN - ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num (2*1984)))) THEN - ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num (3*1984))))) - [`val (x:int64)`;`val (y:int64)`;`val (z:int64)`] THEN - RULE_ASSUM_TAC (REWRITE_RULE [ARITH_RULE`!x k. ~(x < k) <=> k <= x`]) THEN - TRY ASM_ARITH_TAC (* Remove invalid layouts *) THEN - TRY_CONST_PC_TAC (mk_numeral (num (3 * 1984 + 128))) THEN - TRY_CONST_PC_TAC (mk_numeral (num (2 * 1984 + 128))) THEN - TRY_CONST_PC_TAC (mk_numeral (num (1 * 1984 + 128))) THEN - TRY_CONST_PC_TAC (mk_numeral (num (0 * 1984 + 128))); - - (** SUBGOAL 2 **) + REWRITE_TAC[BIGNUM_MUL_8_16_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + FIND_HOLE_TAC; + ALL_TAC ] THEN (* massage nonoverlapping assumptions *) - REWRITE_TAC (ALL::mc_lengths_th) THEN REPEAT_N 2 STRIP_TAC THEN - (* Start reasoning *) - MATCH_MP_TAC ENSURES_N_ENSURES THEN - (* TODO: infer this automatically *) - EXISTS_TAC `\(s:armstate). (4*(497-1) - 0x0c) DIV 4` THEN - (* Prepare a 'conjunction' of SPEC and EQUIV *) - MP_TAC ( - let ensures_n_part = - SPECL [`z:int64`;`x:int64`;`y:int64`;`a:num`;`b:num`;`pc:num`] - BIGNUM_MUL_8_16_CORE_CORRECT_N in - let equiv_part = - SPECL [`pc:num`;`pc2:num`;`z:int64`;`x:int64`;`y:int64`] BIGNUM_MUL_8_16_EQUIV in - let conj_ensures_n_equiv = CONJ ensures_n_part equiv_part in - MATCH_MP (TAUT`((P==>Q)/\(R==>S)) ==> ((P/\R)==>(Q/\S))`) conj_ensures_n_equiv) THEN - (* Erase 'assumptions' in the spec's nonoverlapping *) - ASM_REWRITE_TAC (ALL::mc_lengths_th) THEN - (* Reduce all number expressions (actually, wanted to target numsteps functions but it is hard *) - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - (* Conjunction of ensures2 and ensures_n *) - DISCH_THEN (fun th -> LABEL_TAC "H" (REWRITE_RULE[] (MATCH_MP ENSURES_N_ENSURES2_CONJ th))) THEN - (* .. and apply H as a precondition of ENSURES2_ENSURES_N *) - REMOVE_THEN "H" (fun th -> - let th2 = MATCH_MP - (REWRITE_RULE [TAUT `(P/\P2/\P3==>Q) <=> P==>P2==>P3==>Q`] ENSURES2_ENSURES_N) th in - MATCH_MP_TAC (REWRITE_RULE [TAUT`(P==>Q==>R) <=> (P/\Q==>R)`] th2)) THEN - REWRITE_TAC[] THEN + + VCGEN_EQUIV_TAC BIGNUM_MUL_8_16_EQUIV BIGNUM_MUL_8_16_CORE_CORRECT_N + [BIGNUM_MUL_8_16_EXEC; BIGNUM_MUL_8_16_NEON_EXEC] THEN + (* unravel definitions that may block reasonings *) - RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES]) THEN + RULE_ASSUM_TAC (REWRITE_RULE([ALL;NONOVERLAPPING_CLAUSES] @ mc_lengths_th)) THEN + REPEAT SPLIT_FIRST_CONJ_ASSUM_TAC THEN REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN REPEAT CONJ_TAC THENL [ (** SUBGOAL 1. Precond **) X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN ASM_REWRITE_TAC[bignum_mul_8_16_equiv_input_states] THEN EXISTS_TAC `write (memory :> bytelist (word (pc+12),LENGTH (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes))) (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes) (write PC (word (pc+12)) s2)` THEN - ASM_REWRITE_TAC[aligned_bytes_loaded;bytes_loaded] THEN - SUBGOAL_THEN `4 divides val (word (pc+12):int64)` (fun th -> REWRITE_TAC[th]) THENL [ - (** SUBGOAL 1 **) - UNDISCH_TAC `4 divides val (word pc:int64)` THEN - REWRITE_TAC[VAL_WORD;DIMINDEX_64] THEN - (let divth = REWRITE_RULE[EQ_CLAUSES] (NUM_DIVIDES_CONV `4 divides 2 EXP 64`) in - REWRITE_TAC[GSYM (MATCH_MP DIVIDES_MOD2 divth)]) THEN - IMP_REWRITE_TAC[DIVIDES_ADD] THEN - STRIP_TAC THEN CONV_TAC NUM_DIVIDES_CONV; - - (** SUBGOAL 2 **) + SUBGOAL_THEN `aligned_bytes_loaded s2 (word (pc2 + 12):int64) bignum_mul_8_16_neon_core_mc` + (fun th -> REWRITE_TAC[th]) THENL [ + REWRITE_TAC[bignum_mul_8_16_neon_core_mc_def] THEN + IMP_REWRITE_TAC[WORD_ADD;ALIGNED_BYTES_LOADED_SUB_LIST] THEN + CONV_TAC NUM_DIVIDES_CONV THEN NO_TAC; + ALL_TAC + ] THEN + SUBGOAL_THEN `4 divides val (word (pc+12):int64)` ASSUME_TAC THENL [ + IMP_REWRITE_TAC[DIVIDES_4_VAL_WORD_ADD_64] THEN CONV_TAC NUM_DIVIDES_CONV; ALL_TAC ] THEN - (* Expand variables appearing in the equiv relation *) - REPEAT CONJ_TAC THEN - (* for register updates *) - (TRY (REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC)) THEN - (* for register updates, with rhses abbreviated *) - (TRY (EXPAND_RHS_TAC THEN REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC)) THEN - (* for memory updates *) - (TRY (EXPAND_RHS_TAC THEN - REWRITE_TAC[LENGTH_APPEND;BIGNUM_MUL_8_16_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN - READ_OVER_WRITE_ORTHOGONAL_TAC)) THEN - (TRY ((MATCH_MP_TAC READ_OVER_WRITE_MEMORY_APPEND_BYTELIST ORELSE - MATCH_MP_TAC READ_OVER_WRITE_MEMORY_BYTELIST) THEN - REWRITE_TAC[LENGTH_APPEND;BIGNUM_MUL_8_16_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN - ARITH_TAC)) THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MUL_8_16_CORE_EXEC THEN (* Now has only one subgoal: the equivalence! *) REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN - REPEAT CONJ_TAC THEN - (TRY (EXPAND_RHS_TAC THEN REFL_TAC)) THEN - (TRY (EXPAND_RHS_TAC THEN - REWRITE_TAC[LENGTH_APPEND;BIGNUM_MUL_8_16_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN - READ_OVER_WRITE_ORTHOGONAL_TAC)); + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MUL_8_16_CORE_EXEC; (** SUBGOAL 2. Postcond **) MESON_TAC[bignum_mul_8_16_equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; diff --git a/arm/proofs/bignum_sqr_8_16.ml b/arm/proofs/bignum_sqr_8_16.ml index 26d98653..8ed1b280 100644 --- a/arm/proofs/bignum_sqr_8_16.ml +++ b/arm/proofs/bignum_sqr_8_16.ml @@ -312,18 +312,13 @@ let bignum_sqr_8_16_mc = define_assert_from_elf "bignum_sqr_8_16_mc" "arm/fastmu let BIGNUM_SQR_8_16_EXEC = ARM_MK_EXEC_RULE bignum_sqr_8_16_mc;; (* bignum_sqr_8_16 without callee-save register spilling. *) -let bignum_sqr_8_16_core_mc_def = define - `bignum_sqr_8_16_core_mc = - SUB_LIST (8, LENGTH bignum_sqr_8_16_mc - 20) bignum_sqr_8_16_mc`;; - -let bignum_sqr_8_16_core_mc = - CONV_RULE (REWRITE_CONV [BIGNUM_SQR_8_16_EXEC] THENC - ONCE_DEPTH_CONV NUM_SUB_CONV THENC - REWRITE_CONV [bignum_sqr_8_16_mc] THENC - RAND_CONV SUB_LIST_CONV) - bignum_sqr_8_16_core_mc_def;; - -let BIGNUM_SQR_8_16_CORE_EXEC = ARM_MK_EXEC_RULE bignum_sqr_8_16_core_mc;; +let bignum_sqr_8_16_core_mc_def, + bignum_sqr_8_16_core_mc, + BIGNUM_SQR_8_16_CORE_EXEC = + mk_sublist_of_mc "bignum_sqr_8_16_core_mc" + bignum_sqr_8_16_mc + (`8`,`LENGTH bignum_sqr_8_16_mc - 20`) + BIGNUM_SQR_8_16_EXEC;; (* ------------------------------------------------------------------------- *) (* Lemmas to halve the number of case splits, useful for efficiency. *) diff --git a/arm/proofs/bignum_sqr_8_16_neon.ml b/arm/proofs/bignum_sqr_8_16_neon.ml index c3638855..3ac71128 100644 --- a/arm/proofs/bignum_sqr_8_16_neon.ml +++ b/arm/proofs/bignum_sqr_8_16_neon.ml @@ -390,6 +390,15 @@ let bignum_sqr_8_16_neon_mc = define_assert_from_elf "bignum_sqr_8_16_neon_mc" let BIGNUM_SQR_8_16_NEON_EXEC = ARM_MK_EXEC_RULE bignum_sqr_8_16_neon_mc;; +(* bignum_sqr_8_16_neon_mc without ret. *) +let bignum_sqr_8_16_neon_core_mc_def, + bignum_sqr_8_16_neon_core_mc, + BIGNUM_SQR_8_16_NEON_CORE_EXEC = + mk_sublist_of_mc "bignum_sqr_8_16_neon_core_mc" + bignum_sqr_8_16_neon_mc + (`8`,`LENGTH bignum_sqr_8_16_neon_mc - 20`) + BIGNUM_SQR_8_16_NEON_EXEC;; + (** Equivalence relation at the begin and end of the two programs (after stack push/pops stripped **) @@ -569,121 +578,106 @@ let actions2 = [ ];; -let must_be_smarter_tactic:tactic = - let eqeq = METIS[] `!x1 x2 y1 y2 a. (x1+x2=a /\ y1+y2=a) ==> x1+x2=y1+y2` in - FIRST_X_ASSUM (fun th1 -> - FIRST_X_ASSUM (fun th2 -> - PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM (MATCH_MP eqeq (CONJ th1 th2))));; + +let equiv_goal = mk_equiv_statement + `ALL (nonoverlapping (z,8 * 16)) [ + (word pc:int64,LENGTH bignum_sqr_8_16_mc); + (word pc2:int64,LENGTH bignum_sqr_8_16_neon_mc); + (x,8 * 8)]` + bignum_sqr_8_16_equiv_input_states + bignum_sqr_8_16_equiv_output_states + bignum_sqr_8_16_core_mc 8 + `MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS` + bignum_sqr_8_16_neon_core_mc 8 + `MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, + MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5; Q6; Q7; Q16; Q17; Q18; Q19; Q20; + Q21; Q22; Q23; Q30] ,, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS`;; let _org_extra_word_CONV = !extra_word_CONV;; extra_word_CONV := [GEN_REWRITE_CONV I [WORD_BITMANIP_SIMP_LEMMAS; WORD_SQR64_HI; WORD_SQR64_LO; WORD_MUL64_LO; WORD_MUL64_HI]] @ (!extra_word_CONV);; -let BIGNUM_SQR_8_16_EQUIV = prove( - `!pc pc2 z x. - ALL (nonoverlapping (z,8 * 16)) [ - (word pc,LENGTH bignum_sqr_8_16_mc); (word pc2,LENGTH bignum_sqr_8_16_neon_mc); - (x,8 * 8)] - ==> ensures2 arm - (\(s,s2). aligned_bytes_loaded s (word (pc + 0x8)) bignum_sqr_8_16_core_mc /\ - read PC s = word (pc + 0x8) /\ - aligned_bytes_loaded s2 (word pc2) bignum_sqr_8_16_neon_mc /\ - read PC s2 = word (pc2 + 0x8) /\ - bignum_sqr_8_16_equiv_input_states (s,s2) x z) - (\(s,s2). aligned_bytes_loaded s (word (pc + 0x08)) bignum_sqr_8_16_core_mc /\ - read PC s = word (pc + 4*(293-1)) /\ - aligned_bytes_loaded s2 (word pc2) bignum_sqr_8_16_neon_mc /\ - read PC s2 = word (pc2 + 4*(367-1)) /\ - bignum_sqr_8_16_equiv_output_states (s,s2) x z) - (\(s,s2) (s',s2'). - ((MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; - X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, - MAYCHANGE [memory :> bytes(z,8 * 16)] ,, - MAYCHANGE SOME_FLAGS) s s') /\ - ((MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; - X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, - MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5; Q6; Q7; Q16; Q17; Q18; Q19; Q20; - Q21; Q22; Q23; Q30] ,, - MAYCHANGE [memory :> bytes(z,8 * 16)] ,, - MAYCHANGE SOME_FLAGS) s2 s2')) - (\s. (4*(293-1) - 0x8) DIV 4) (\s2. (4*(367-1) - 0x8) DIV 4)`, - (* Converted by thenify.py *) - REWRITE_TAC[SOME_FLAGS;ALL;NONOVERLAPPING_CLAUSES;BIGNUM_SQR_8_16_EXEC; - BIGNUM_SQR_8_16_NEON_EXEC;BIGNUM_SQR_8_16_CORE_EXEC] THEN - CONV_TAC (DEPTH_CONV (* avoid reducing EXP *) - (NUM_ADD_CONV ORELSEC NUM_SUB_CONV ORELSEC NUM_MULT_CONV ORELSEC NUM_DIV_CONV)) THEN +let BIGNUM_SQR_8_16_EQUIV = prove(equiv_goal, + + REWRITE_TAC[SOME_FLAGS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_SQR_8_16_EXEC;BIGNUM_SQR_8_16_NEON_EXEC; + BIGNUM_SQR_8_16_CORE_EXEC;BIGNUM_SQR_8_16_NEON_CORE_EXEC] THEN REPEAT STRIP_TAC THEN (** Initialize **) EQUIV_INITIATE_TAC bignum_sqr_8_16_equiv_input_states THEN REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN (* necessary to run ldr qs *) - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 x) s0'` `a'_1:int64` `a'_0:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 16))) s0'` `a'_3:int64` `a'_2:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 32))) s0'` `a'_5:int64` `a'_4:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 48))) s0'` `a'_7:int64` `a'_6:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 x) s0` `a_1:int64` `a_0:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 16))) s0` `a_3:int64` `a_2:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 32))) s0` `a_5:int64` `a_4:int64` THEN - BYTES128_EQ_JOIN64_TAC `read (memory :> bytes128 (word_add x (word 48))) s0` `a_7:int64` `a_6:int64` THEN - must_be_smarter_tactic THEN + COMBINE_READ_BYTES64_PAIRS_TAC THEN (* adding offset due to the size difference between .._mc and .._core_mc *) + ASSERT_NONOVERLAPPING_MODULO_TAC + `nonoverlapping_modulo (2 EXP 64) + (val (z:int64),8 * 16) (pc + 8,LENGTH bignum_sqr_8_16_core_mc)` + BIGNUM_SQR_8_16_CORE_EXEC THEN + ASSERT_NONOVERLAPPING_MODULO_TAC + `nonoverlapping_modulo (2 EXP 64) + (val (z:int64),8 * 16) (pc2 + 8,LENGTH bignum_sqr_8_16_neon_core_mc)` + BIGNUM_SQR_8_16_NEON_CORE_EXEC THEN ABBREV_TAC `pc' = pc + 8` THEN - SUBGOAL_THEN `nonoverlapping_modulo (2 EXP 64) - (val (z:int64),8 * 16) (pc',LENGTH bignum_sqr_8_16_core_mc)` - (fun th -> ASSUME_TAC (REWRITE_RULE[BIGNUM_SQR_8_16_CORE_EXEC] th)) THENL [ - REWRITE_TAC[BIGNUM_SQR_8_16_CORE_EXEC] THEN EXPAND_TAC "pc'" THEN - NONOVERLAPPING_TAC; - ALL_TAC] THEN + ABBREV_TAC `pc2' = pc2 + 8` THEN + (* Start *) - MAP_EVERY - (fun action -> - EQUIV_STEP_TAC action BIGNUM_SQR_8_16_CORE_EXEC BIGNUM_SQR_8_16_NEON_EXEC) - actions THEN + EQUIV_STEPS_TAC actions BIGNUM_SQR_8_16_CORE_EXEC BIGNUM_SQR_8_16_NEON_CORE_EXEC THEN + (* This is an unfortunate manual tweak because the word rule in neon_helper isn't + exactly applied. This mismatch happened when the assembly was written by hand. *) SUBGOAL_THEN `val (a'_6:int64) * val (a'_2:int64) = val a'_2 * val a'_6` (fun th -> RULE_ASSUM_TAC(REWRITE_RULE[th])) THENL [ARITH_TAC; ALL_TAC] THEN SUBGOAL_THEN `val (a'_7:int64) * val (a'_3:int64) = val a'_3 * val a'_7` (fun th -> RULE_ASSUM_TAC(REWRITE_RULE[th])) THENL [ARITH_TAC; ALL_TAC] THEN - MAP_EVERY - (fun action -> - EQUIV_STEP_TAC action BIGNUM_SQR_8_16_CORE_EXEC BIGNUM_SQR_8_16_NEON_EXEC) - actions2 THEN - ENSURES_FINAL_STATE'_TAC THEN ENSURES_FINAL_STATE'_TAC THEN + (* More steps *) + EQUIV_STEPS_TAC actions2 BIGNUM_SQR_8_16_CORE_EXEC BIGNUM_SQR_8_16_NEON_CORE_EXEC THEN + + (* Finalize *) + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN (* Prove remaining clauses from the postcondition *) ASM_REWRITE_TAC[] THEN REPEAT CONJ_TAC THENL [ (** SUBGOAL 0. PC **) EXPAND_TAC "pc'" THEN CONV_TAC WORD_RULE; - (** SUBGOAL 1. Outputs **) + (** SUBGOAL 1. PC2 **) + EXPAND_TAC "pc2'" THEN CONV_TAC WORD_RULE; + + (** SUBGOAL 2. Outputs **) ASM_REWRITE_TAC[bignum_sqr_8_16_equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,8) state`; C_ARGUMENTS] THEN REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]) THEN ASM_REWRITE_TAC[BIGNUM_EXPAND_CONV `bignum_from_memory (p,16) st`]; - (** SUBGOAL 2. MAYCHANGE left **) + (** SUBGOAL 3. MAYCHANGE left **) DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN MONOTONE_MAYCHANGE_TAC; - (** SUBGOAL 3. MAYCHANGE right **) + (** SUBGOAL 4. MAYCHANGE right **) DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN MONOTONE_MAYCHANGE_TAC ]);; extra_word_CONV := _org_extra_word_CONV;; -let BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC = prove( - `!pc z x. - nonoverlapping (word pc,LENGTH (bignum_sqr_8_16_mc)) (z,8 * 16) - ==> eventually_n_at_pc - (pc+0x8) (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes) - (pc+0x8) (pc+0x8+LENGTH bignum_sqr_8_16_core_mc) - ((LENGTH bignum_sqr_8_16_core_mc) DIV 4) - (\s0. C_ARGUMENTS [z;x] s0)`, + +let event_n_at_pc_goal = mk_eventually_n_at_pc_statement + `nonoverlapping (word pc:int64,LENGTH bignum_sqr_8_16_mc) (z:int64,8 * 16)` + [`z:int64`;`x:int64`] 8 bignum_sqr_8_16_core_mc 8 + `(\s0. C_ARGUMENTS [z;x] s0)`;; + +let BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, + REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_CORE_EXEC;BIGNUM_SQR_8_16_EXEC; BARRIER_INST_BYTES_LENGTH] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_DIVIDES_CONV) THEN REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN SUBGOAL_THEN `4 divides (LENGTH bignum_sqr_8_16_core_mc)` (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; @@ -694,17 +688,10 @@ let BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC = prove( (* nonoverlapping *) STRIP_TAC THEN (* Abbreviate pc+8 as pc' because EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC likes pc without offsets *) + ASSERT_NONOVERLAPPING_MODULO_TAC + `nonoverlapping_modulo (2 EXP 64) (val (z:int64), 128) (pc+8, LENGTH bignum_sqr_8_16_mc - 8)` + BIGNUM_SQR_8_16_EXEC THEN ABBREV_TAC `pc'=pc+8` THEN - SUBGOAL_THEN - `nonoverlapping_modulo - (2 EXP 64) (val (z:int64), 128) (pc', LENGTH bignum_sqr_8_16_mc - 8)` - MP_TAC THENL [ - REWRITE_TAC[BIGNUM_SQR_8_16_EXEC] THEN CONV_TAC (ONCE_DEPTH_CONV NUM_SUB_CONV) THEN - EXPAND_TAC "pc'" THEN - NONOVERLAPPING_TAC; ALL_TAC] THEN - DISCH_THEN (fun th -> - let th = REWRITE_RULE[BIGNUM_SQR_8_16_EXEC] th in - ASSUME_TAC (CONV_RULE (ONCE_DEPTH_CONV NUM_SUB_CONV) th)) THEN SUBGOAL_THEN `pc + 8 + LENGTH bignum_sqr_8_16_core_mc = pc' + LENGTH bignum_sqr_8_16_core_mc` MP_TAC THENL [ @@ -715,64 +702,17 @@ let BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC = prove( let th = REWRITE_RULE[BIGNUM_SQR_8_16_CORE_EXEC] th in REWRITE_TAC [CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) th]) THEN (* now start..! *) - X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN - STRIP_TAC THEN - DISCH_THEN (LABEL_TAC "HEVENTUALLY") THEN - REWRITE_TAC[eventually_n] THEN - CONJ_TAC THENL [ - (** SUBGOAL 1 **) - (* `!s'. steps arm .. s s' ==> P s s'` *) - X_GEN_TAC `s_final:armstate` THEN UNDISCH_LABEL_TAC "HEVENTUALLY" THEN - REPEAT_I_N 0 290 - (fun i -> EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC - BIGNUM_SQR_8_16_CORE_EXEC `s0:armstate` 0 i 290 THEN - DISCARD_OLDSTATE_TAC ("s" ^ (if i = 289 then "_final" else string_of_int (i+1)))) THEN - (* match last step: utilize the barrier instruction *) - ONCE_REWRITE_TAC[eventually_CASES] THEN - ASM_REWRITE_TAC[] THEN - STRIP_TAC THEN - (let stuck_th = (REWRITE_RULE[TAUT `(P/\Q/\R==>S) <=> (P==>Q==>R==>S)`] - ALIGNED_BYTES_LOADED_BARRIER_ARM_STUCK) in - FIRST_ASSUM (fun th -> - (* th is 'aligned_bytes_loaded s_final (word_add (word pc') (word 1828)) barrier_inst_bytes' *) - let th = REWRITE_RULE [WORD_RULE `word_add (word x) (word y):int64 = word (x+y)`] th in - let th2 = MATCH_MP stuck_th th in - ASM_MESON_TAC[th2])); + X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN + (* eventually ==> eventually_n *) + PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_SQR_8_16_CORE_EXEC);; - (** SUBGOAL 2 **) - ONCE_REWRITE_TAC[ - ITAUT`(!x y. P y /\ Q x y ==> R x y) <=> (!y. P y ==> !x. Q x y ==> R x y)`] THEN - X_GEN_TAC `n0:num` THEN STRIP_TAC THEN GEN_TAC THEN - REPEAT_I_N 0 290 (fun i -> - let n = mk_var("n" ^ (string_of_int i),`:num`) in - let np1 = mk_var("n" ^ (string_of_int (i+1)),`:num`) in - DISJ_CASES_THEN2 - SUBST1_TAC - (X_CHOOSE_THEN np1 - (fun th -> SUBST_ALL_TAC (REWRITE_RULE[ARITH_RULE`SUC x=1+x`] th))) - (SPEC n num_CASES) THENL [ - (** SUBGOAL 1 **) - SIMPLIFY_STEPS_0_TAC THEN - SOLVE_EXISTS_ARM_TAC (snd (CONJ_PAIR BIGNUM_SQR_8_16_CORE_EXEC)); - - (** SUBGOAL 2 **) - EVENTUALLY_STEPS_EXISTS_STEP_TAC BIGNUM_SQR_8_16_CORE_EXEC i (4*(i+1)) THEN - FIRST_X_ASSUM (fun th -> - let res = MATCH_MP (ARITH_RULE`!x. 1+x x<(n-1)`) th in - ASSUME_TAC (CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) res)) - ]) THEN - ASM_ARITH_TAC (* last is: 'n < 0' *) - ]);; - -let BIGNUM_SQR_8_16_CORE_WITH_BARRIER_CORRECT = - prove_correct_barrier_appended BIGNUM_SQR_8_16_CORE_CORRECT - `bignum_sqr_8_16_core_mc` BIGNUM_SQR_8_16_CORE_EXEC;; let BIGNUM_SQR_8_16_CORE_CORRECT_N = - prove_correct_n [BIGNUM_SQR_8_16_EXEC;BIGNUM_SQR_8_16_CORE_EXEC] - BIGNUM_SQR_8_16_CORE_WITH_BARRIER_CORRECT - BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC - `\(s:armstate). 1160 DIV 4`;; + prove_correct_n + BIGNUM_SQR_8_16_EXEC + BIGNUM_SQR_8_16_CORE_EXEC + BIGNUM_SQR_8_16_CORE_CORRECT + BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC;; let BIGNUM_SQR_8_16_NEON_CORRECT = prove( @@ -796,7 +736,8 @@ let BIGNUM_SQR_8_16_NEON_CORRECT = prove( let mc_lengths_th = map (fst o CONJ_PAIR) [BIGNUM_SQR_8_16_EXEC; BIGNUM_SQR_8_16_NEON_EXEC] in REPEAT GEN_TAC THEN - (* Prepare pc for the 'left' program. The left program must not have overwritten x and y. *) + (* Prepare pc for the 'left' program that does not overlap with buffers z and + x. *) SUBGOAL_THEN `?pc. nonoverlapping (z:int64,8 * 16) (word pc,LENGTH bignum_sqr_8_16_mc) /\ @@ -805,102 +746,53 @@ let BIGNUM_SQR_8_16_NEON_CORRECT = prove( (** SUBGOAL 1 **) REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_EXEC;BARRIER_INST_BYTES_LENGTH; NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN - MAP_EVERY (fun t -> ASSUME_TAC (SPEC t VAL_BOUND_64)) [`x:int64`;`z:int64`] THEN - MAP_EVERY (fun (v:term) -> - ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num 1984))) THEN - ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num (2*1984))))) - [`val (x:int64)`;`val (z:int64)`] THEN - RULE_ASSUM_TAC (REWRITE_RULE [ARITH_RULE`!x k. ~(x < k) <=> k <= x`]) THEN - TRY ASM_ARITH_TAC (* Remove invalid layouts *) THEN - TRY_CONST_PC_TAC (mk_numeral (num (2 * 1984 + 128))) THEN - TRY_CONST_PC_TAC (mk_numeral (num (1 * 1984 + 128))) THEN - TRY_CONST_PC_TAC (mk_numeral (num (0 * 1984 + 128))); + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + FIND_HOLE_TAC; (** SUBGOAL 2 **) ALL_TAC ] THEN - (* massage nonoverlapping assumptions *) - REWRITE_TAC (ALL::mc_lengths_th) THEN + REPEAT_N 2 STRIP_TAC THEN - (* Start reasoning *) - MATCH_MP_TAC ENSURES_N_ENSURES THEN - (* TODO: infer this automatically *) - EXISTS_TAC `\(s:armstate). (1476 - 12 - 8) DIV 4` THEN - (* Prepare a 'conjunction' of SPEC and EQUIV *) - MP_TAC ( - let ensures_n_part = - SPECL [`z:int64`;`x:int64`;`a:num`;`pc:num`] - BIGNUM_SQR_8_16_CORE_CORRECT_N in - let equiv_part = - SPECL [`pc:num`;`pc2:num`;`z:int64`;`x:int64`] - BIGNUM_SQR_8_16_EQUIV in - let conj_ensures_n_equiv = CONJ ensures_n_part equiv_part in - MATCH_MP (TAUT`((P==>Q)/\(R==>S)) ==> ((P/\R)==>(Q/\S))`) conj_ensures_n_equiv) THEN - (* Erase 'assumptions' in the spec's nonoverlapping *) - ASM_REWRITE_TAC (ALL::mc_lengths_th) THEN - SUBGOAL_THEN `nonoverlapping (word pc:int64,1180) (z:int64, 8*16)` - (fun th -> REWRITE_TAC[th]) THENL [ - REWRITE_TAC[NONOVERLAPPING_CLAUSES] THEN - RULE_ASSUM_TAC(REWRITE_RULE[NONOVERLAPPING_CLAUSES]) THEN - NONOVERLAPPING_TAC; ALL_TAC] THEN - (* Reduce all number expressions (actually, wanted to target numsteps functions but it is hard *) - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - (* Conjunction of ensures2 and ensures_n *) - DISCH_THEN (fun th -> LABEL_TAC "H" (REWRITE_RULE[] (MATCH_MP ENSURES_N_ENSURES2_CONJ th))) THEN - (* .. and apply H as a precondition of ENSURES2_ENSURES_N *) - REMOVE_THEN "H" (fun th -> - let th2 = MATCH_MP - (REWRITE_RULE [TAUT `(P/\P2/\P3==>Q) <=> P==>P2==>P3==>Q`] ENSURES2_ENSURES_N) th in - MATCH_MP_TAC (REWRITE_RULE [TAUT`(P==>Q==>R) <=> (P/\Q==>R)`] th2)) THEN - REWRITE_TAC[] THEN - (* unravel definitions that may block reasonings *) - RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES]) THEN + + VCGEN_EQUIV_TAC BIGNUM_SQR_8_16_EQUIV BIGNUM_SQR_8_16_CORE_CORRECT_N + [BIGNUM_SQR_8_16_EXEC; BIGNUM_SQR_8_16_NEON_EXEC] THEN + + (* unfold definitions that may block further tactics *) + RULE_ASSUM_TAC (REWRITE_RULE([ALL;NONOVERLAPPING_CLAUSES] @ mc_lengths_th)) THEN + REPEAT SPLIT_FIRST_CONJ_ASSUM_TAC THEN REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN REPEAT CONJ_TAC THENL [ (** SUBGOAL 1. Precond **) X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN ASM_REWRITE_TAC[bignum_sqr_8_16_equiv_input_states] THEN EXISTS_TAC `write (memory :> bytelist (word (pc+8),LENGTH (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes))) (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes) (write PC (word (pc+8)) s2)` THEN - ASM_REWRITE_TAC[aligned_bytes_loaded;bytes_loaded] THEN - SUBGOAL_THEN `4 divides val (word (pc+8):int64)` (fun th -> REWRITE_TAC[th]) THENL [ - (** SUBGOAL 1 **) - UNDISCH_TAC `4 divides val (word pc:int64)` THEN - REWRITE_TAC[VAL_WORD;DIMINDEX_64] THEN - (let divth = REWRITE_RULE[EQ_CLAUSES] (NUM_DIVIDES_CONV `4 divides 2 EXP 64`) in - REWRITE_TAC[GSYM (MATCH_MP DIVIDES_MOD2 divth)]) THEN - IMP_REWRITE_TAC[DIVIDES_ADD] THEN - STRIP_TAC THEN CONV_TAC NUM_DIVIDES_CONV; - - (** SUBGOAL 2 **) + SUBGOAL_THEN `aligned_bytes_loaded s2 (word (pc2 + 8):int64) bignum_sqr_8_16_neon_core_mc` + (fun th -> REWRITE_TAC[th]) THENL [ + REWRITE_TAC[bignum_sqr_8_16_neon_core_mc_def] THEN + IMP_REWRITE_TAC[WORD_ADD;ALIGNED_BYTES_LOADED_SUB_LIST] THEN + CONV_TAC NUM_DIVIDES_CONV; + ALL_TAC + ] THEN + SUBGOAL_THEN `4 divides val (word (pc+8):int64)` ASSUME_TAC THENL [ + IMP_REWRITE_TAC[DIVIDES_4_VAL_WORD_ADD_64] THEN CONV_TAC NUM_DIVIDES_CONV; ALL_TAC ] THEN (* Expand variables appearing in the equiv relation *) - REPEAT CONJ_TAC THEN - (* for register updates *) - (TRY (REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC)) THEN - (* for register updates, with rhses abbreviated *) - (TRY (EXPAND_RHS_TAC THEN REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC)) THEN - (* for memory updates *) - (TRY (EXPAND_RHS_TAC THEN - REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN - READ_OVER_WRITE_ORTHOGONAL_TAC)) THEN - (TRY ((MATCH_MP_TAC READ_OVER_WRITE_MEMORY_APPEND_BYTELIST ORELSE - MATCH_MP_TAC READ_OVER_WRITE_MEMORY_BYTELIST) THEN - REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN - ARITH_TAC)) THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_SQR_8_16_CORE_EXEC THEN (* Now has only one subgoal: the equivalence! *) REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN MAP_EVERY EXISTS_TAC [`a:num`] THEN - REPEAT CONJ_TAC THEN - (TRY (EXPAND_RHS_TAC THEN REFL_TAC)) THEN - (TRY (EXPAND_RHS_TAC THEN - REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN - READ_OVER_WRITE_ORTHOGONAL_TAC)); + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_SQR_8_16_CORE_EXEC THEN + NO_TAC; (** SUBGOAL 2. Postcond **) MESON_TAC[bignum_sqr_8_16_equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; diff --git a/arm/proofs/decode.ml b/arm/proofs/decode.ml index 97a6caa4..fe51c220 100644 --- a/arm/proofs/decode.ml +++ b/arm/proofs/decode.ml @@ -523,13 +523,16 @@ let decode = new_definition `!w:int32. decode w = SOME (arm_UMOV (WREG' Rd) (QREG' Rn) (val (word_subword imm5 (3,2): 2 word)) 4) else NONE // v.h, v.b are unsupported - | [0:1; 0:1; 0b101110:6; size:2; 1:1; Rm:5; 0b110000:6; Rn:5; Rd:5] -> - // UMULL (vector, Q = 0). UMULL2 (Q = 1) isn't implemented yet. + | [0:1; q; 0b101110:6; size:2; 1:1; Rm:5; 0b110000:6; Rn:5; Rd:5] -> + // UMULL (vector, Q = 0). UMULL2 (vector, Q = 1) if size = (word 0b11:(2)word) then NONE // UNDEFINED else let esize = 8 * (2 EXP val size) in // the bitwidth of src elements // datasize is 64. elements is datasize / esize. - SOME (arm_UMULL_VEC (QREG' Rd) (QREG' Rn) (QREG' Rm) esize) + if q then + SOME (arm_UMULL2_VEC (QREG' Rd) (QREG' Rn) (QREG' Rm) esize) + else + SOME (arm_UMULL_VEC (QREG' Rd) (QREG' Rn) (QREG' Rm) esize) | [0:1; q; 0b001110:6; size:2; 0b0:1; Rm:5; 0b000110:6; Rn:5; Rd:5] -> // UZP1 diff --git a/arm/proofs/equiv.ml b/arm/proofs/equiv.ml index 8415257b..58f0fe8e 100644 --- a/arm/proofs/equiv.ml +++ b/arm/proofs/equiv.ml @@ -14,6 +14,33 @@ needs "common/relational2.ml";; (* Generic lemmas and tactics that are useful *) (* ------------------------------------------------------------------------- *) +let get_bytelist_length (ls:term): int = + try + let th = LENGTH_CONV (mk_comb (`LENGTH:((8)word)list->num`,ls)) in + Num.int_of_num (dest_numeral (rhs (concl th))) + with Failure s -> + failwith (Printf.sprintf + "get_bytelist_length: cannot get the length of `%s`" (string_of_term ls));; + +let rec takedrop (n:int) (l:'a list): 'a list * 'a list = + if n = 0 then ([],l) + else + match l with + | [] -> failwith "l too short" + | h::t -> + let a,b = takedrop (n-1) t in + (h::a,b);; + + +let DIVIDES_4_VAL_WORD_ADD_64 = prove( + `!pc ofs. 4 divides val (word pc:int64) /\ 4 divides ofs ==> + 4 divides val (word (pc+ofs):int64)`, + REWRITE_TAC[VAL_WORD;DIMINDEX_64] THEN + (let divth = REWRITE_RULE[EQ_CLAUSES] (NUM_DIVIDES_CONV `4 divides 2 EXP 64`) in + REWRITE_TAC[GSYM (MATCH_MP DIVIDES_MOD2 divth)]) THEN + IMP_REWRITE_TAC[DIVIDES_ADD]);; + + let EXPAND_RHS_TAC: tactic = fun (asl,g) -> let _,rhs = dest_eq g in @@ -220,7 +247,7 @@ let _ = DIV_EXP_REDUCE_RULE `2 EXP 192 * z + 2 EXP 128 * x + 2 EXP 64 * y` `2 EX (* From val a + k = val a' + k' where k and k' are known to be >= 2 EXP 64, return a = a' /\ k = k'. *) -let PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM = +let PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC = let pth = prove(`!(k:num) (k':num). ((2 EXP 64) divides k) /\ ((2 EXP 64) divides k') ==> (!(a:int64) (a':int64). (val a + k = val a' + k') @@ -275,9 +302,31 @@ let _ = prove(`!(a0:int64) (a1:int64) (a2:int64) val b0 + 2 EXP 64 * val b1 + 2 EXP 128 * val b2 ==> a0 = b0 /\ a1 = b1 /\ a2 = b2`, REPEAT GEN_TAC THEN - DISCH_THEN PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM THEN + DISCH_THEN PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN REPEAT CONJ_TAC THEN REFL_TAC);; +(* A variant of PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC that + (1) works on assumptions, and + (2) equalities must have intermediate variables: + `val a0 + 2 EXP 64 * val a1 + 2 EXP 128 * val a2 = n`, + `val b0 + 2 EXP 64 * val b1 + 2 EXP 128 * val b2 = n`, + ... *) +let ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC:tactic = + let eqeq = METIS[] `!x1 x2 y1 y2 a. (x1+x2=a /\ y1+y2=a) ==> x1+x2=y1+y2` in + REPEAT (FIRST_X_ASSUM (fun th1 -> + FIRST_X_ASSUM (fun th2 -> + PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC + (MATCH_MP eqeq (CONJ th1 th2)))));; + +let _ = prove( + `!(a0:int64) (a1:int64) (a2:int64) n (b0:int64) (b1:int64) (b2:int64). + val a0 + 2 EXP 64 * val a1 + 2 EXP 128 * val a2 = n /\ + val b0 + 2 EXP 64 * val b1 + 2 EXP 128 * val b2 = n + ==> a0 = b0 /\ a1 = b1 /\ a2 = b2`, + REPEAT STRIP_TAC THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + REFL_TAC);; + (* A simple tactic that introduces `steps arm 0 stname stname` as an assumption. *) let ASSUME_STEPS_ID stname = @@ -300,25 +349,15 @@ let ENSURES2_FRAME_SUBSUMED_TAC = else NO_TAC)) (asl,g)) THEN REWRITE_TAC[GSYM subsumed; ETA_AX] THEN SUBSUMED_MAYCHANGE_TAC;; - -(* A recursive function for defining a conjunction of equality clauses *) -let mk_equiv_regs = define - `((mk_equiv_regs:((armstate,(64)word)component)list->armstate#armstate->bool) - [] s = T) /\ - (mk_equiv_regs (CONS reg regs) (s1,s2) = - ?(a:int64). read reg s1 = a /\ read reg s2 = a /\ - mk_equiv_regs regs (s1,s2))`;; - -let mk_equiv_bool_regs = define - `((mk_equiv_bool_regs:((armstate,bool)component)list->armstate#armstate->bool) - [] s = T) /\ - (mk_equiv_bool_regs (CONS reg regs) (s1,s2) = - ?(a:bool). read reg s1 = a /\ read reg s2 = a /\ - mk_equiv_bool_regs regs (s1,s2))`;; +let ASSERT_NONOVERLAPPING_MODULO_TAC t core_exec = + SUBGOAL_THEN t + (fun th -> ASSUME_TAC (REWRITE_RULE[core_exec] th)) THENL [ + REWRITE_TAC[core_exec] THEN NONOVERLAPPING_TAC; + ALL_TAC];; (* Assumption stash/recovery tactic. *) -let left_prog_state_asms: (string * thm) list ref = ref [];; +let left_prog_state_asms: (string * thm) list list ref = ref [];; let STASH_ASMS_OF_READ_STATES (stnames:string list): tactic = fun (asl,g) -> @@ -329,11 +368,14 @@ let STASH_ASMS_OF_READ_STATES (stnames:string list): tactic = let stname = fst (dest_var itm) in List.exists (fun i -> i = stname) stnames with _ -> false) asl in - left_prog_state_asms := left_prog; + left_prog_state_asms := left_prog::!left_prog_state_asms; ALL_TAC (others,g);; let RECOVER_ASMS_OF_READ_STATES: tactic = - fun (asl,g) -> ALL_TAC (asl @ !left_prog_state_asms, g);; + fun (asl,g) -> + let a = List.hd !left_prog_state_asms in + left_prog_state_asms := List.tl !left_prog_state_asms; + ALL_TAC (asl @ a, g);; let mk_fresh_temp_name = let counter: int ref = ref 0 in @@ -424,173 +466,17 @@ let ALIGNED_BYTES_LOADED_BARRIER_ARM_STUCK = prove( REWRITE_TAC[BARRIER_INST_DECODE_NONE;OPTION_DISTINCT]);; - -(* Take the name of a hypothesis which is 'arm s s2', and expand it to - 'write ... s = s2' and apply thm tactic *) -let EXPAND_ARM_THEN (h_arm_hyp:string) (exec_decode_th:thm) (ttac:thm->tactic):tactic = - REMOVE_THEN h_arm_hyp (fun th -> - (fun (asl,g) -> - let r = ONCE_REWRITE_RULE[ARM_CONV exec_decode_th (map snd asl) (concl th)] in - ttac (r th) (asl,g)));; - -let EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC (h_arm_hyp:string) - (exec_decode_th:thm) (exec_decode_len:thm):tactic = - EXPAND_ARM_THEN h_arm_hyp exec_decode_th MP_TAC THEN - NONSELFMODIFYING_STATE_UPDATE_TAC - (MATCH_MP aligned_bytes_loaded_update exec_decode_len) THEN - NONSELFMODIFYING_STATE_UPDATE_TAC - (MATCH_MP aligned_bytes_loaded_update BARRIER_INST_BYTES_LENGTH) THEN - ASSUMPTION_STATE_UPDATE_TAC THEN - DISCH_TAC;; +(* ------------------------------------------------------------------------- *) +(* Tactics for simulating a program whose postcondition is eventually_n. *) +(* ------------------------------------------------------------------------- *) let PRINT_TAC (s:string): tactic = W (fun (asl,g) -> Printf.printf "%s\n" s; ALL_TAC);; -(* - Add an assumption - `read PC (next_st_var_name:armstate) = word (pc+next_pc_offset)` - *) -let UPDATE_PC_TAC (pc_var_name:string) (next_st_var_name:string) (next_pc_offset:term):tactic = - let next_st_var = mk_var(next_st_var_name, `:armstate`) and - pc_var = mk_var(pc_var_name, `:num`) in - SUBGOAL_THEN (subst [next_st_var,`next_st_var:armstate`;next_pc_offset,`k4p4:num`; - pc_var,`pc:num`] - `read PC next_st_var = word (pc+k4p4)`) - ASSUME_TAC THENL - [(EXPAND_TAC next_st_var_name THEN REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC) - ORELSE PRINT_TAC "UPDATE_PC_TAC could not prove this goal" THEN PRINT_GOAL_TAC THEN NO_TAC; ALL_TAC];; - -let find_pc_varname (asl:(string * thm)list) (stname:string): string = - let st_var = mk_var (stname,`:armstate`) in - let ls = List.map (fun (_,th) -> - try - let lhs,rhs = dest_eq(concl th) in - let reg,st = dest_binary "read" lhs in - if reg <> `PC` || st <> st_var then None - else - let theword,expr = dest_comb rhs in - if theword <> `word:num->int64` then None - else if is_var expr then Some (fst (dest_var expr)) - else try - let lhs,rhs = dest_binary "+" expr in - Some (fst (dest_var lhs)) - with _ -> - (Printf.printf "Cannot understand `%s`" (string_of_term (concl th)); None) - with _ -> None) asl in - let (Some pcname)::[] = List.filter (fun t -> t <> None) ls in - pcname;; - -(* `read PC s{k} = word (pc + {pc_init_ofs+4k}) - ----- - eventually arm (\s'. read PC s' = word (pc + {pc_init_ofs+4n}) /\ P s{k} s') - s{k} - ==> steps arm {n-k} s{k} s' ==> P s{k} s'` - --> - `read PC s{k} = word (pc + {pc_init_ofs+4k}) - ----- - eventually arm (\s'. read PC s' = word (pc + {pc_init_ofs+4n}) /\ P s{k+1} s') - s{k+1} - ==> steps arm {n-k-1} s{k+1} s' ==> P s{k+1} s'` - *) -let EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC - (exec_decode:thm) (init_st_var:term) (pc_init_ofs:int) (k:int) (n:int):tactic = - let exec_decode_len,exec_decode_th = CONJ_PAIR exec_decode and - k4::k4p4::n4::nmk4::nmk::nmkmone4::nmkmone::kpcofs4::k4p4pcofs4::npcofs4::[] = - List.map (fun n -> mk_numeral (num n)) - [k*4; (k+1)*4; n*4; (n-k)*4; n-k; (n-k-1)*4; n-k-1; - pc_init_ofs+k*4;pc_init_ofs+(k+1)*4;pc_init_ofs+n*4] in - let nmk_th = ARITH_RULE - (subst [nmk,`nmk:num`;nmkmone,`nmkmone:num`] `nmk = 1 + nmkmone`) in - - let mk_lt (l:term) (r:term) = - subst [l,`__l__:num`;r,`__r__:num`] `__l__ < __r__` in - let next_st_var_name = "s" ^ (string_of_int (k+1)) in - let next_st_var = mk_var (next_st_var_name,`:armstate`) in - - ONCE_REWRITE_TAC[eventually_CASES] THEN - REWRITE_TAC[nmk_th] THEN - (* PC mismatch *) - ASM_SIMP_TAC[WORD64_NE_ADD;WORD64_NE_ADD2; - ARITH_RULE(mk_lt kpcofs4 npcofs4); - ARITH_RULE(mk_lt npcofs4 `2 EXP 64`)] THEN - ONCE_REWRITE_TAC[STEPS_STEP;STEPS_ONE] THEN - - DISCH_THEN (fun th -> let _,th2 = CONJ_PAIR th in - LABEL_TAC "HEVENTUALLY" th2) THEN - DISCH_THEN (LABEL_TAC "HSTEPS") THEN - (* If HSTEPS is `?s'. arm s s' /\ steps ...`, choose s'. - Otherwise, it is `arm s s_final`; don't touch it *) - TRY (X_CHOOSE_LABEL_TAC next_st_var "HSTEPS") THEN - REMOVE_THEN "HSTEPS" (fun th -> - if is_conj (concl th) then - let th1,th2 = CONJ_PAIR th in - LABEL_TAC "HARM" th1 THEN MP_TAC th2 - else LABEL_TAC "HARM" th) THEN - REMOVE_THEN "HEVENTUALLY" - (fun th -> USE_THEN "HARM" (fun th2 -> MP_TAC (MATCH_MP th th2))) THEN - - (* get explicit definition of the next step *) - EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC "HARM" exec_decode_th exec_decode_len THEN - W (fun (asl,g) -> - (* Find the name of variable 'pc' from `read PC s = word (..pc..)` assumption *) - let pcname = find_pc_varname asl ("s" ^ (string_of_int k)) in - UPDATE_PC_TAC pcname (if k + 1 = n then "s_final" else next_st_var_name) k4p4pcofs4);; - - -(* `[ read PC sk = .. ] - ------ - steps arm (n) sk s' ==> (?s''. arm s' s'')` - --> - `[ read PC sk+1 = .. ] - ------ - steps arm (n-1) sk+1 s' ==> (?s''. arm s' s'') ` - - n is either a constant or an expression '1+x'. - *) -let EVENTUALLY_STEPS_EXISTS_STEP_TAC (exec_decode:thm) (k:int) (next_pc_ofs:int): tactic = - let exec_decode_len,exec_decode_th = CONJ_PAIR exec_decode in - fun (asl,g) -> - let lhs_steps,rhs = dest_imp g in - let nterm = rand(rator(rator(lhs_steps))) in - let snextname = "s" ^ (string_of_int (k+1)) in - let snext = mk_var(snextname, `:armstate`) in - let pcname = find_pc_varname asl ("s" ^ (string_of_int k)) in - - ((if is_numeral nterm then - let nminus1 = (dest_numeral nterm) -/ (num 1) in - GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) [ - ARITH_RULE - (mk_eq (nterm, mk_binary "+" (`1`, mk_numeral (nminus1))))] - else ALL_TAC) THEN - GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) [STEPS_STEP] THEN - (* deal with imp lhs: `?s''. arm sk s'' /\ steps arm (n-1) s'' s'` *) - DISCH_THEN (X_CHOOSE_THEN snext - (fun th -> let th_arm, th_steps = CONJ_PAIR th in - LABEL_TAC "HARM" th_arm THEN - MP_TAC th_steps)) THEN - EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC "HARM" exec_decode_th exec_decode_len THEN - UPDATE_PC_TAC pcname snextname (mk_numeral (num next_pc_ofs)) THEN - DISCARD_OLDSTATE_TAC snextname - ) - (asl,g);; - -(* Given a goal with conclusion `steps arm 0 s s' ==> G`, remove lhs and replace s' with s in G. *) -let SIMPLIFY_STEPS_0_TAC: tactic = - DISCH_THEN (fun th -> - REWRITE_TAC[GSYM(REWRITE_RULE[STEPS_TRIVIAL] th)]);; - -(* Prove `?s'. arm s s'`. *) -let SOLVE_EXISTS_ARM_TAC (exec_decode_th:thm): tactic = - (fun (asl,g) -> - let arm_term = snd (strip_exists g) in - (ONCE_REWRITE_TAC[ARM_CONV exec_decode_th (map snd asl) arm_term]) - (asl,g)) THEN - MESON_TAC[];; - (* A variant of ARM_BASIC_STEP_TAC, but - targets eventually_n - preserves 'arm s sname' at assumption *) -let ARM_BASIC_STEP'_TAC2 = +let ARM_BASIC_STEP'2_TAC = let arm_tm = `arm` and arm_ty = `:armstate` in fun execth2 sname (asl,w) -> (* w = `eventually_n _ {stepn} _ {sv}` *) @@ -609,18 +495,9 @@ let ARM_BASIC_STEP'_TAC2 = DISCH_THEN (fun th -> ASSUME_TAC th THEN MP_TAC th) THEN GEN_REWRITE_TAC LAND_CONV [eth]]) (asl,w);; -let rec filter_map f l = - match l with - | [] -> [] - | a::l -> - begin match f a with - | Some x -> x::(filter_map f l) - | None -> filter_map f l - end;; - (* Find `arm sprev sname` and `steps arm n s0 sprev`, and produce `steps arm (n+1) s0 sname` *) -let ARM_ELONGATE_STEPS:string->tactic = +let ARM_ELONGATE_STEPS_TAC:string->tactic = let arm_const = `arm` and arm_ty = `:armstate` in let steps_arm_term = `steps arm` in let sprev_term = mk_var("sprev", arm_ty) in @@ -634,7 +511,7 @@ let ARM_ELONGATE_STEPS:string->tactic = let s = mk_var(sname,arm_ty) in let arm_term = mk_comb(mk_comb(arm_const,sprev_term),s) in (* 1. Find 'arm sprev s' where s is already given *) - let arm_assums = filter_map (fun (_,assum) -> + let arm_assums = List.filter_map (fun (_,assum) -> try let i:instantiation = term_match [s] arm_term (concl assum) in Some (find_mapped_term i sprev_term, assum) @@ -644,7 +521,7 @@ let ARM_ELONGATE_STEPS:string->tactic = begin let steps_term = mk_comb(mk_comb(mk_comb(steps_arm_term,n_term),s0_term),sprev) in (* 2. Find 'steps arm n s0 sprev' where sprev is given *) - let steps_assums = filter_map (fun (_,assum) -> + let steps_assums = List.filter_map (fun (_,assum) -> try let i = term_match [sprev] steps_term (concl assum) in Some (find_mapped_term i n_term, find_mapped_term i s0_term, assum) @@ -679,9 +556,9 @@ let ARM_STEP'_TAC (execth::subths) sname (store_update_to:thm ref option) = (*** This does the basic decoding setup ***) - ARM_BASIC_STEP'_TAC2 execth2 sname THEN + ARM_BASIC_STEP'2_TAC execth2 sname THEN (* Elongate 'steps arm n ..' to 'steps arm (n+1) ..' *) - ARM_ELONGATE_STEPS sname THEN + ARM_ELONGATE_STEPS_TAC sname THEN (*** This part shows the code isn't self-modifying ***) @@ -790,60 +667,70 @@ let ENSURES_FINAL_STATE'_TAC = NO_TAC])));; -(* Given eq = `read c s = e`, abbreviate e as a fresh variable. - Also, if there is another read `read c st' = e'`, try to - solve `e' = e` using WORD_RULE and rewrite all e' with e in - assumptions. +(* Given eqs = (`read c s = e1`, `read c' s' = e2`), + prove e1 = e2 using WORD_RULE, and abbreviate e1 and e2 as a + fresh variable. *) -let ABBREV_READ_RHS_TAC (eq:term) (st':term):tactic = +let ABBREV_READS_TAC (eqs:term*term):tactic = W(fun (asl,g) -> + let eq,eq2 = eqs in if not (is_eq eq) then ALL_TAC else (* eq is: `read elem s = e` *) let lhs,rhs = dest_eq eq in (* If lhs is PC update, don't abbrevate it *) if (can (term_match [] `read PC s`) lhs) then ALL_TAC else - (* If rhs is already a variable, don't abbreviate it again *) + (* If rhs is already a variable, don't abbreviate it again. + Don't try to prove the rhs of eq2. *) if is_var rhs then ALL_TAC else let vname = mk_fresh_temp_name() in Printf.printf "Abbreviating `%s` (which is `%s`) as \"%s\"..\n" (string_of_term rhs) (string_of_term lhs) vname; - (* If `read elem s_left = e` does not appear in assumption, - raise a warning because the lock-stepping instructions are - not equivalent. *) - let left_prog_read = mk_comb (fst (dest_comb lhs), st') in - begin match (List.filter - (fun (_,t') -> let tt = concl t' in is_eq tt && fst (dest_eq tt) = left_prog_read) asl) with - | [] -> - Printf.printf "\t- Warning: could not find `%s = *`\n" (string_of_term left_prog_read); - ALL_TAC - | h::[] -> - let h = concl (snd h) in - let hlhs,hrhs = dest_eq h in - if hrhs <> rhs then - try - let r = WORD_RULE (mk_eq(hrhs,rhs)) in - Printf.printf "\t- Abbreviating `%s` as \"%s\" as well\n" - (string_of_term hrhs) vname; - RULE_ASSUM_TAC (REWRITE_RULE[r]) - with _ -> - Printf.printf "\t- Warning: could not find `%s = %s`, but `%s`\n" - (string_of_term hlhs) (string_of_term rhs) (string_of_term h); - ALL_TAC - else ALL_TAC - | _ -> - Printf.printf "\t- Warning: found more than one `%s = *`\n" - (string_of_term left_prog_read); - ALL_TAC - end THEN + + let lhs2,rhs2 = dest_eq eq2 in + (if rhs2 = rhs then ALL_TAC else + try + let r = WORD_RULE (mk_eq(rhs2,rhs)) in + Printf.printf "\t- Abbreviating `%s` as \"%s\" as well\n" + (string_of_term rhs2) vname; + RULE_ASSUM_TAC (REWRITE_RULE[r]) + with _ -> + Printf.printf "\t- Error: WORD_RULE could not prove `%s = %s`\n" + (string_of_term rhs2) (string_of_term rhs); + failwith "ABBREV_READS_TAC") THEN let fresh_var = mk_var (vname,type_of rhs) in ABBREV_TAC (mk_eq (fresh_var,rhs)));; + + +(* ------------------------------------------------------------------------- *) +(* Tactics and definitions for proving program equivalence. *) +(* ------------------------------------------------------------------------- *) + +(* A recursive function for defining a conjunction of equality clauses *) +let mk_equiv_regs = define + `((mk_equiv_regs:((armstate,(64)word)component)list->armstate#armstate->bool) + [] s = T) /\ + (mk_equiv_regs (CONS reg regs) (s1,s2) = + ?(a:int64). read reg s1 = a /\ read reg s2 = a /\ + mk_equiv_regs regs (s1,s2))`;; + +let mk_equiv_bool_regs = define + `((mk_equiv_bool_regs:((armstate,bool)component)list->armstate#armstate->bool) + [] s = T) /\ + (mk_equiv_bool_regs (CONS reg regs) (s1,s2) = + ?(a:bool). read reg s1 = a /\ read reg s2 = a /\ + mk_equiv_bool_regs regs (s1,s2))`;; + (* A lock-step simulation. - This also abbreviates the new expressions appearing on the new state - expressions of the right-side program, and forgets abbreviations that were - used in the past. *) + This abbreviates the new expression(s) appearing on the new state + expression(s) of the right-side program, and checks whether + new expression(s) appearing on the left-side program are equivalent + to it. + + It forgets abbreviations that were used in the past. *) let ARM_LOCKSTEP_TAC = - let update_eqs_rhs: thm ref = ref (TAUT `T`) in + let update_eqs_prog1: thm ref = ref (TAUT `T`) in + let update_eqs_prog2: thm ref = ref (TAUT `T`) in fun (execth:thm) (execth':thm) (snum:int) (snum':int) (stname'_suffix:string) -> let new_stname = "s" ^ (string_of_int snum) in let new_st = mk_var (new_stname,`:armstate`) and @@ -857,22 +744,37 @@ let ARM_LOCKSTEP_TAC = Printf.printf "ARM_LOCKSTEP_TAC (%d,%d)\n" snum snum'; let cur_stname' = name_of (rand (snd ((dest_abs o rand o rator) g))) in STASH_ASMS_OF_READ_STATES [cur_stname'] (asl,g)) THEN - ARM_STEP'_TAC (execth::[]) new_stname None THEN + ARM_STEP'_TAC (execth::[]) new_stname (Some update_eqs_prog1) THEN DISCARD_OLDSTATE'_TAC [new_stname] false THEN RECOVER_ASMS_OF_READ_STATES THEN (* 2. One step on the right program. *) MATCH_MP_TAC EVENTUALLY_N_SWAP THEN STASH_ASMS_OF_READ_STATES [new_stname] THEN - ARM_STEP'_TAC (execth'::[]) new_stname' (Some update_eqs_rhs) THEN + ARM_STEP'_TAC (execth'::[]) new_stname' (Some update_eqs_prog2) THEN DISCARD_OLDSTATE'_TAC [new_stname'] true(*remove assumptions using old abbrevs*) THEN RECOVER_ASMS_OF_READ_STATES THEN MATCH_MP_TAC EVENTUALLY_N_SWAP THEN (* 3. Abbreviate expressions that appear in the new state expressions created from step 2. *) W (fun (asl,g) -> - MAP_EVERY - (fun (eqth:thm) -> ABBREV_READ_RHS_TAC (concl eqth) new_st) - (CONJUNCTS !update_eqs_rhs));; + let update_eqs_prog1_list = CONJUNCTS !update_eqs_prog1 in + let update_eqs_prog2_list = CONJUNCTS !update_eqs_prog2 in + if List.length update_eqs_prog1_list <> + List.length update_eqs_prog2_list + then + (Printf.printf "Updated components mismatch:\n"; + Printf.printf "\tprog1: "; + print_qterm (concl !update_eqs_prog1); + Printf.printf "\n\tprog2: "; + print_qterm (concl !update_eqs_prog2); + failwith "ARM_LOCKSTEP_TAC") + else + let eqs = zip + (map concl update_eqs_prog1_list) + (map concl update_eqs_prog2_list) in + MAP_EVERY + (fun (eq1,eq2) -> ABBREV_READS_TAC (eq1,eq2)) + eqs);; let EQUIV_INITIATE_TAC input_equiv_states_th = @@ -920,6 +822,9 @@ let ARM_STUTTER_RIGHT_TAC (exec_th:thm) (snames:int list) (st_suffix:string): ta RECOVER_ASMS_OF_READ_STATES THEN MATCH_MP_TAC EVENTUALLY_N_SWAP);; +(* Tactics that simulate two partially different programs. + Instructions are considered equivalent if they are alpha-equivalent. *) + let EQUIV_STEP_TAC action (execth1:thm) (execth2:thm): tactic = match action with | ("equal",lstart,lend,rstart,rend) -> @@ -944,10 +849,255 @@ let EQUIV_STEP_TAC action (execth1:thm) (execth2:thm): tactic = ORELSE (PRINT_TAC "replace failed: stuttering right" THEN PRINT_GOAL_TAC THEN NO_TAC))) | (s,_,_,_,_) -> failwith ("Unknown action: " ^ s);; +let EQUIV_STEPS_TAC actions (execth1:thm) (execth2:thm): tactic = + MAP_EVERY + (fun action -> EQUIV_STEP_TAC action execth1 execth2) + actions;; +(* Given eq = (`read c s = rhs`), abbreviate e as a fresh variable. + and push this at append_to. + append_to is a list of `rhs = fresh_var` equalities. +*) +let ABBREV_READ_TAC (eq:term) (append_to:thm list ref):tactic = + W(fun (asl,g) -> + if not (is_eq eq) then + (Printf.printf "ABBREV_READ_TAC: not equality, passing..: `%s`\n" + (string_of_term eq); + ALL_TAC) else + (* eq is: `read elem s = e` *) + let lhs,rhs = dest_eq eq in + (* If lhs is PC update, don't abbrevate it *) + if (can (term_match [] `read PC s`) lhs) then ALL_TAC else + let vname = mk_fresh_temp_name() in + Printf.printf "Abbreviating `%s` (which is `%s`) as \"%s\"..\n" + (string_of_term rhs) (string_of_term lhs) vname; -let prove_correct_barrier_appended (correct_th:thm) (core_mc:term) (core_exec_th:thm): thm = + let fresh_var = mk_var (vname,type_of rhs) in + ABBREV_TAC (mk_eq (fresh_var,rhs)) THEN + (fun (asl,g) -> + append_to := (snd (List.hd asl))::!append_to; + ALL_TAC (asl,g)));; + +(* Simulate an instruction of the left program and assign fresh variables + to the RHSes of new state equations (`read c s = RHS`). + store_to is a list of `RHS = assigned_fresh_var` theorems. +*) + +let ARM_STEP'_AND_ABBREV_TAC = + let update_eqs_prog: thm ref = ref (TAUT `T`) in + fun (execth:thm) (new_stname) (store_to:thm list ref) -> + (* Stash the right program's state equations first *) + (fun (asl,g) -> + let cur_stname' = name_of (rand (snd ((dest_abs o rand o rator) g))) in + STASH_ASMS_OF_READ_STATES [cur_stname'] (asl,g)) THEN + (* One step on the left program *) + ARM_STEP'_TAC (execth::[]) new_stname (Some update_eqs_prog) THEN + DISCARD_OLDSTATE'_TAC [new_stname] false THEN + RECOVER_ASMS_OF_READ_STATES THEN + (* Abbreviate RHSes of the new state equations *) + W (fun (asl,g) -> + let update_eqs_prog_list = CONJUNCTS !update_eqs_prog in + MAP_EVERY + (fun th -> ABBREV_READ_TAC (concl th) store_to) + update_eqs_prog_list);; + +let ARM_STEPS'_AND_ABBREV_TAC (execth:thm) (snums:int list) + (store_to: (int * thm) list ref):tactic = + MAP_EVERY + (fun n -> + let stname = "s" ^ (string_of_int n) in + let store_to_n = ref [] in + ARM_STEP'_AND_ABBREV_TAC execth stname store_to_n THEN + (fun (asl,g) -> + store_to := (map (fun x -> (n,x)) !store_to_n) @ !store_to; + ALL_TAC (asl,g))) + snums;; + +let get_read_component (eq:term): term = + let lhs = fst (dest_eq eq) in + rand (rator lhs);; + +let _ = get_read_component `read X1 s = word 0`;; + +(* For the right program *) +let ARM_STEPS'_AND_REWRITE_TAC (execth:thm) (snums:int list) (inst_map: int list) + (abbrevs: (int * thm) list ref):tactic = + MAP_EVERY + (fun n -> + let stname = "s'" ^ (string_of_int n) in + let new_state_eq = ref (REFL `T`) in + (* Stash the left program's state equations first *) + (fun (asl,g) -> + let cur_stname = name_of (rand g) in + STASH_ASMS_OF_READ_STATES [cur_stname] (asl,g)) THEN + MATCH_MP_TAC EVENTUALLY_N_SWAP THEN + ARM_STEP'_TAC (execth::[]) stname (Some new_state_eq) THEN + DISCARD_OLDSTATE'_TAC [stname] false THEN + MATCH_MP_TAC EVENTUALLY_N_SWAP THEN + RECOVER_ASMS_OF_READ_STATES THEN + (fun (asl,g) -> + let n_at_lprog = List.nth inst_map (n-1) in + let abbrevs_for_st_n = List.filter (fun (n',t)->n'=n_at_lprog) !abbrevs in + let new_state_eqs = CONJUNCTS !new_state_eq in + (* filter out read PC *) + let new_state_eqs = List.filter + (fun th -> not (can (term_match [] `read PC s`) (fst (dest_eq (concl th))))) + new_state_eqs in + if List.length abbrevs_for_st_n = List.length new_state_eqs then + (* `read c sn = rhs` <=> `read c sn = abbrev` *) + let rewrite_rules = List.filter_map + (fun new_state_eq -> + let rhs = snd (dest_eq (concl new_state_eq)) in + match List.find_opt + (fun (_,th') -> fst (dest_eq (concl th')) = rhs) + abbrevs_for_st_n with + | Some (_,rhs_to_abbrev) -> + let th' = SPEC rhs EQ_REFL in + Some (GEN_REWRITE_RULE RAND_CONV [rhs_to_abbrev] th') + | None -> (* This case happens when new_state_eq already has abbreviated RHS *) + None) + new_state_eqs in + RULE_ASSUM_TAC(REWRITE_RULE rewrite_rules) (asl,g) + else + (Printf.printf "State number %d: length mismatch: %d <> %d\n" + n (List.length new_state_eqs) (List.length abbrevs_for_st_n); + Printf.printf " new state eq:\n"; + List.iter (fun t -> Printf.printf " %s\n" (string_of_term (concl t))) new_state_eqs; + failwith "ARM_STEPS'_AND_REWRITE_TAC"))) + snums;; + + +(* An ad-hoc tactic for proving a goal + `read c1 s = .. /\ read c2 s = .. /\ ...`. This also accepts + a clause which is a predicate 'aligned_bytes_loaded'. + Clauses which cannot not be proven with this tactic will remain as a goal. *) +let PROVE_CONJ_OF_EQ_READS_TAC (execth:thm) = + REPEAT CONJ_TAC THEN + TRY ( + (* for register updates *) + (REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC) ORELSE + (* for register updates, with rhses abbreviated *) + (EXPAND_RHS_TAC THEN REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC) + ORELSE + (* for memory updates *) + (ASM_REWRITE_TAC[aligned_bytes_loaded;bytes_loaded] THEN + EXPAND_RHS_TAC THEN + REWRITE_TAC[LENGTH_APPEND;execth;BARRIER_INST_BYTES_LENGTH] THEN + READ_OVER_WRITE_ORTHOGONAL_TAC) ORELSE + (ASM_REWRITE_TAC[aligned_bytes_loaded;bytes_loaded] THEN + (MATCH_MP_TAC READ_OVER_WRITE_MEMORY_APPEND_BYTELIST ORELSE + MATCH_MP_TAC READ_OVER_WRITE_MEMORY_BYTELIST) THEN + REWRITE_TAC[LENGTH_APPEND;execth;BARRIER_INST_BYTES_LENGTH] THEN + ARITH_TAC));; + + + +(* Prove goals like + `?pc. nonoverlapping_modulo (2 EXP 64) (pc,36) (val addr_out,32) /\ + nonoverlapping_modulo (2 EXP 64) (pc,36) (val addr_in,32) /\ + 4 divides val (word pc)` + by instantiating pc *) +let TRY_CONST_PC_TAC (pc:term):tactic = + TRY (EXISTS_TAC pc THEN + REPEAT CONJ_TAC THEN + TRY (MATCH_MP_TAC NONOVERLAPPING_MODULO_SIMPLE THEN ASM_ARITH_TAC) THEN + REWRITE_TAC[VAL_WORD;DIMINDEX_64;DIVIDES_DIV_MULT] THEN ARITH_TAC);; + +(* Prove goals like + `?pc. nonoverlapping_modulo (2 EXP 64) (pc,36) (val addr_out,32) /\ + nonoverlapping_modulo (2 EXP 64) (pc,36) (val addr_in,32) /\ + 4 divides val (word pc)` + by finding a 'hole' from the memory layout which can place (pc,36). *) +let FIND_HOLE_TAC: tactic = + fun (asl,g) -> + (* Sanity-check the goal *) + let pcvar, goal_body = dest_exists g in + let goal_conjs = conjuncts goal_body in + let terms_nonoverlap, term_divides = (butlast goal_conjs, last goal_conjs) in + if fst (strip_comb term_divides) <> `divides` + then failwith ("Not 'divides' predicate: " ^ (string_of_term term_divides)) else + let ranges:(term*int) list = List.concat_map + (fun t -> + let tt,ranges = strip_comb t in + if tt <> `nonoverlapping_modulo` then + failwith ("Not 'nonoverlapping_modulo' predicate: " ^ (string_of_term t)) + else + let dest_range (ti:term) = + if not (is_pair ti) + then failwith ("Not a pair: " ^ (string_of_term ti)) else + let ptr,size = dest_pair ti in + if not (is_numeral size) + then failwith ("Has non-constant size: " ^ (string_of_term t)) else + (ptr, dest_small_numeral size) in + List.map dest_range (List.tl ranges)) + terms_nonoverlap in + let ranges = uniq (sort (<) ranges) in + Printf.printf "Aggregated ranges: %s\n" + (String.concat ", " (List.map + (fun (t,n) -> Printf.sprintf "(%s,%d)" (string_of_term t) n) ranges)); + (* prepare val X < 2 EXP 64 *) + let val_word_ptrs = List.map fst (List.filter + (fun (t,_) -> fst (strip_comb t) = `val:int64 -> num`) + ranges) in + let word_ptrs = List.map (fun t -> snd (dest_comb t)) val_word_ptrs in + Printf.printf "Base pointers (except PC): %s\n" + (String.concat ", " (List.map string_of_term word_ptrs)); + let val_bound_prepare_tac = + MAP_EVERY (fun t -> ASSUME_TAC (SPEC t VAL_BOUND_64)) word_ptrs in + (* Now prepare case split. *) + let segsize = List.fold_left (+) 0 (List.map snd ranges) in + let maxlen = List.fold_left max 0 (List.map snd + (List.filter (fun (t,n) -> t <> pcvar) ranges)) in + Printf.printf "segsize: %d, maxlen: %d\n%!\n" segsize maxlen; + + (* MAP_EVERY (fun (v:term) -> + ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num segsize))) THEN + ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num (2*segsize))))) + [`val (addr_in:int64)`;`val (addr_out:int64)`] *) + let cases_tac = MAP_EVERY (fun (v:term) -> + List.fold_left then_ ALL_TAC + (List.map + (fun i -> ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num (i * segsize))))) + (1--(List.length word_ptrs)))) + val_word_ptrs in + + (* Invoke TRY_CONST_PC_TAC to try each hole. *) + let try_holes = List.fold_left then_ ALL_TAC + (List.map + (fun i -> TRY_CONST_PC_TAC (mk_numeral (num (i * segsize + maxlen)))) + (0--(List.length word_ptrs))) in + + ((val_bound_prepare_tac THEN + cases_tac THEN + RULE_ASSUM_TAC (REWRITE_RULE [ARITH_RULE`!x k. ~(x < k) <=> k <= x`]) THEN + TRY ASM_ARITH_TAC (* Remove invalid layouts *) THEN + try_holes THEN + PRINT_GOAL_TAC THEN NO_TAC) + ORELSE FAIL_TAC "Has unresolved goals") (asl,g);; + + +(* ------------------------------------------------------------------------- *) +(* Functions that convert a specification theorem into a different form. *) +(* ------------------------------------------------------------------------- *) + +let to_ensures_n (ensures_form:term) (numsteps_fn:term): term = + let g = ensures_form in + let g_quants,g_imp = strip_forall g in + let g_asms,g_ensures = dest_imp g_imp in + let g_ensures,g_ensures_frame = dest_comb g_ensures in + let g_ensures,g_ensures_post = dest_comb g_ensures in + let _,g_ensures_pre = dest_comb g_ensures in + let g_ensures_n = subst [g_ensures_pre,`P:armstate->bool`; + g_ensures_post,`Q:armstate->bool`; + g_ensures_frame,`Fr:armstate->armstate->bool`; + numsteps_fn,`fn:armstate->num`] + `ensures_n arm P Q Fr fn` in + list_mk_forall (g_quants,mk_imp(g_asms,g_ensures_n));; + +let prove_correct_barrier_appended (correct_th:thm) (core_exec_th:thm): thm = + (* core_exec_th = `LENGTH core_mc = .. /\ (...)` *) + let core_mc = snd (dest_comb (fst (dest_eq (concl (fst (CONJ_PAIR core_exec_th)))))) in let core_mc_with_barrier = mk_binop `APPEND:((8)word)list->((8)word)list->((8)word)list` core_mc `barrier_inst_bytes` in @@ -968,22 +1118,10 @@ let prove_correct_barrier_appended (correct_th:thm) (core_mc:term) (core_exec_th ] THEN ASM_MESON_TAC[aligned_bytes_loaded_append]);; -let to_ensures_n (ensures_form:term) (numsteps_fn:term): term = - let g = ensures_form in - let g_quants,g_imp = strip_forall g in - let g_asms,g_ensures = dest_imp g_imp in - let g_ensures,g_ensures_frame = dest_comb g_ensures in - let g_ensures,g_ensures_post = dest_comb g_ensures in - let _,g_ensures_pre = dest_comb g_ensures in - let g_ensures_n = subst [g_ensures_pre,`P:armstate->bool`; - g_ensures_post,`Q:armstate->bool`; - g_ensures_frame,`Fr:armstate->armstate->bool`; - numsteps_fn,`fn:armstate->num`] - `ensures_n arm P Q Fr fn` in - list_mk_forall (g_quants,mk_imp(g_asms,g_ensures_n));; - -let prove_correct_n (execths:thm list) (correct_th:thm) (event_n_at_pc_th:thm) (numsteps_fn:term): thm = - let to_eventually_th = REWRITE_RULE execths event_n_at_pc_th in +let prove_correct_n (execth:thm) (core_execth:thm) (correct_th:thm) + (event_n_at_pc_th:thm): thm = + let correct_th = prove_correct_barrier_appended correct_th core_execth in + let to_eventually_th = REWRITE_RULE [execth;core_execth] event_n_at_pc_th in let to_eventually_th = CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) to_eventually_th in let to_eventually_th = REWRITE_RULE[ eventually_n_at_pc; @@ -996,6 +1134,14 @@ let prove_correct_n (execths:thm list) (correct_th:thm) (event_n_at_pc_th:thm) ( TAUT `(P==>(!x. Q x)) <=> (!x. P==>Q x)`; TAUT`(P==>Q==>R)<=>(P/\Q==>R)`; GSYM CONJ_ASSOC] eventually_form in + let nsteps = + let body = snd (strip_forall (concl event_n_at_pc_th)) in + let body = if is_imp body then snd (dest_imp body) else body in + let pred,args = strip_comb body in + if pred <> `eventually_n_at_pc` + then failwith "eventually_n_at_pc expected" + else List.nth args 4 in + let numsteps_fn = mk_abs (`s:armstate`,nsteps) in prove(to_ensures_n (concl correct_th) numsteps_fn, (* Reduce the step function *) CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN @@ -1008,12 +1154,380 @@ let prove_correct_n (execths:thm list) (correct_th:thm) (event_n_at_pc_th:thm) ( (string_of_thm eventually_form) ^ "`") THEN PRINT_GOAL_TAC THEN NO_TAC)));; -(* Prove goals like - `?pc. nonoverlapping_modulo (2 EXP 64) (pc,36) (val addr_out,32) /\ - nonoverlapping_modulo (2 EXP 64) (pc,36) (val addr_in,32) /\ - 4 divides val (word pc)` *) -let TRY_CONST_PC_TAC (pc:term):tactic = - TRY (EXISTS_TAC pc THEN - REPEAT CONJ_TAC THEN - TRY (MATCH_MP_TAC NONOVERLAPPING_MODULO_SIMPLE THEN ASM_ARITH_TAC) THEN - REWRITE_TAC[VAL_WORD;DIMINDEX_64;DIVIDES_DIV_MULT] THEN ARITH_TAC);; + +(* ------------------------------------------------------------------------- *) +(* Tactics for simulating a program when the goal to prove is *) +(* created by mk_eventually_n_at_pc_statement. *) +(* ------------------------------------------------------------------------- *) + +(* Given a goal with conclusion `steps arm 0 s s' ==> G`, remove lhs and + replace s' with s in G. *) +let SIMPLIFY_STEPS_0_TAC: tactic = + DISCH_THEN (fun th -> + REWRITE_TAC[GSYM(REWRITE_RULE[STEPS_TRIVIAL] th)]);; + +let find_pc_varname (asl:(string * thm)list) (stname:string): string = + let st_var = mk_var (stname,`:armstate`) in + let pcname::[] = List.filter_map (fun (_,th) -> + try + let lhs,rhs = dest_eq(concl th) in + let reg,st = dest_binary "read" lhs in + if reg <> `PC` || st <> st_var then None + else + let theword,expr = dest_comb rhs in + if theword <> `word:num->int64` then None + else if is_var expr then Some (fst (dest_var expr)) + else try + let lhs,rhs = dest_binary "+" expr in + Some (fst (dest_var lhs)) + with _ -> + (Printf.printf "Cannot understand `%s`" (string_of_term (concl th)); None) + with _ -> None) asl in + pcname;; + +(* Take the name of a hypothesis which is 'arm s s2', and expand it to + 'write ... s = s2' and apply thm tactic *) +let EXPAND_ARM_THEN (h_arm_hyp:string) (exec_decode_th:thm) (ttac:thm->tactic):tactic = + REMOVE_THEN h_arm_hyp (fun th -> + (fun (asl,g) -> + let r = ONCE_REWRITE_RULE[ARM_CONV exec_decode_th (map snd asl) (concl th)] in + ttac (r th) (asl,g)));; + +let EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC (h_arm_hyp:string) + (exec_decode_th:thm) (exec_decode_len:thm):tactic = + EXPAND_ARM_THEN h_arm_hyp exec_decode_th MP_TAC THEN + NONSELFMODIFYING_STATE_UPDATE_TAC + (MATCH_MP aligned_bytes_loaded_update exec_decode_len) THEN + NONSELFMODIFYING_STATE_UPDATE_TAC + (MATCH_MP aligned_bytes_loaded_update BARRIER_INST_BYTES_LENGTH) THEN + ASSUMPTION_STATE_UPDATE_TAC THEN + DISCH_TAC;; + +(* Prove `?s'. arm s s'`. *) +let SOLVE_EXISTS_ARM_TAC (exec_decode_th:thm): tactic = + (fun (asl,g) -> + let arm_term = snd (strip_exists g) in + (ONCE_REWRITE_TAC[ARM_CONV exec_decode_th (map snd asl) arm_term]) + (asl,g)) THEN + MESON_TAC[];; + +(* + Add an assumption + `read PC (next_st_var_name:armstate) = word (pc+next_pc_offset)` + *) +let UPDATE_PC_TAC (pc_var_name:string) (next_st_var_name:string) (next_pc_offset:term):tactic = + let next_st_var = mk_var(next_st_var_name, `:armstate`) and + pc_var = mk_var(pc_var_name, `:num`) in + SUBGOAL_THEN (subst [next_st_var,`next_st_var:armstate`;next_pc_offset,`k4p4:num`; + pc_var,`pc:num`] + `read PC next_st_var = word (pc+k4p4)`) + ASSUME_TAC THENL + [(EXPAND_TAC next_st_var_name THEN REPEAT COMPONENT_READ_OVER_WRITE_LHS_TAC THEN REFL_TAC) + ORELSE PRINT_TAC "UPDATE_PC_TAC could not prove this goal" THEN PRINT_GOAL_TAC THEN NO_TAC; ALL_TAC];; + +(* `read PC s{k} = word (pc + {pc_init_ofs+4k}) + ----- + eventually arm (\s'. read PC s' = word (pc + {pc_init_ofs+4n}) /\ P s{k} s') + s{k} + ==> steps arm {n-k} s{k} s' ==> P s{k} s'` + --> + `read PC s{k} = word (pc + {pc_init_ofs+4k}) + ----- + eventually arm (\s'. read PC s' = word (pc + {pc_init_ofs+4n}) /\ P s{k+1} s') + s{k+1} + ==> steps arm {n-k-1} s{k+1} s' ==> P s{k+1} s'` + *) +let EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC + (exec_decode:thm) (init_st_var:term) (pc_init_ofs:int) (k:int) (n:int):tactic = + let exec_decode_len,exec_decode_th = CONJ_PAIR exec_decode and + k4::k4p4::n4::nmk4::nmk::nmkmone4::nmkmone::kpcofs4::k4p4pcofs4::npcofs4::[] = + List.map (fun n -> mk_numeral (num n)) + [k*4; (k+1)*4; n*4; (n-k)*4; n-k; (n-k-1)*4; n-k-1; + pc_init_ofs+k*4;pc_init_ofs+(k+1)*4;pc_init_ofs+n*4] in + let nmk_th = ARITH_RULE + (subst [nmk,`nmk:num`;nmkmone,`nmkmone:num`] `nmk = 1 + nmkmone`) in + + let mk_lt (l:term) (r:term) = + subst [l,`__l__:num`;r,`__r__:num`] `__l__ < __r__` in + let next_st_var_name = "s" ^ (string_of_int (k+1)) in + let next_st_var = mk_var (next_st_var_name,`:armstate`) in + + (fun (asl,g) -> + if !arm_print_log then + (Printf.printf "\n"; + PRINT_GOAL_TAC (asl,g)) + else ALL_TAC (asl,g)) THEN + ONCE_REWRITE_TAC[eventually_CASES] THEN + REWRITE_TAC[nmk_th] THEN + (* PC mismatch *) + ASM_SIMP_TAC[WORD64_NE_ADD;WORD64_NE_ADD2; + ARITH_RULE(mk_lt kpcofs4 npcofs4); + ARITH_RULE(mk_lt npcofs4 `2 EXP 64`)] THEN + ONCE_REWRITE_TAC[STEPS_STEP;STEPS_ONE] THEN + + DISCH_THEN (fun th -> let _,th2 = CONJ_PAIR th in + LABEL_TAC "HEVENTUALLY" th2) THEN + DISCH_THEN (LABEL_TAC "HSTEPS") THEN + (* If HSTEPS is `?s'. arm s s' /\ steps ...`, choose s'. + Otherwise, it is `arm s s_final`; don't touch it *) + TRY (X_CHOOSE_LABEL_TAC next_st_var "HSTEPS") THEN + REMOVE_THEN "HSTEPS" (fun th -> + if is_conj (concl th) then + let th1,th2 = CONJ_PAIR th in + LABEL_TAC "HARM" th1 THEN MP_TAC th2 + else LABEL_TAC "HARM" th) THEN + REMOVE_THEN "HEVENTUALLY" + (fun th -> USE_THEN "HARM" (fun th2 -> MP_TAC (MATCH_MP th th2))) THEN + + (* get explicit definition of the next step *) + EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC "HARM" exec_decode_th exec_decode_len THEN + W (fun (asl,g) -> + (* Find the name of variable 'pc' from `read PC s = word (..pc..)` assumption *) + let pcname = find_pc_varname asl ("s" ^ (string_of_int k)) in + UPDATE_PC_TAC pcname (if k + 1 = n then "s_final" else next_st_var_name) k4p4pcofs4);; + +(* `[ read PC sk = .. ] + ------ + steps arm (n) sk s' ==> (?s''. arm s' s'')` + --> + `[ read PC sk+1 = .. ] + ------ + steps arm (n-1) sk+1 s' ==> (?s''. arm s' s'') ` + + n is either a constant or an expression '1+x'. + *) +let EVENTUALLY_STEPS_EXISTS_STEP_TAC (exec_decode:thm) (k:int) (next_pc_ofs:int): tactic = + let exec_decode_len,exec_decode_th = CONJ_PAIR exec_decode in + fun (asl,g) -> + let lhs_steps,rhs = dest_imp g in + let nterm = rand(rator(rator(lhs_steps))) in + let snextname = "s" ^ (string_of_int (k+1)) in + let snext = mk_var(snextname, `:armstate`) in + let pcname = find_pc_varname asl ("s" ^ (string_of_int k)) in + + ((if is_numeral nterm then + let nminus1 = (dest_numeral nterm) -/ (num 1) in + GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) [ + ARITH_RULE + (mk_eq (nterm, mk_binary "+" (`1`, mk_numeral (nminus1))))] + else ALL_TAC) THEN + GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) [STEPS_STEP] THEN + (* deal with imp lhs: `?s''. arm sk s'' /\ steps arm (n-1) s'' s'` *) + DISCH_THEN (X_CHOOSE_THEN snext + (fun th -> let th_arm, th_steps = CONJ_PAIR th in + LABEL_TAC "HARM" th_arm THEN + MP_TAC th_steps)) THEN + EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC "HARM" exec_decode_th exec_decode_len THEN + UPDATE_PC_TAC pcname snextname (mk_numeral (num next_pc_ofs)) THEN + DISCARD_OLDSTATE_TAC snextname + ) + (asl,g);; + +(* Prove: + eventually arm (\s'. read PC s' = word (pc' + 1160) /\ P s0 s') s0 + ==> eventually_n arm n (\s'. read PC s' = word (pc' + 1160) /\ P s0 s') s0 *) +let PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC execth = + let mc_length_th = fst (CONJ_PAIR execth) in + let n = Num.int_of_num (dest_numeral (snd (dest_eq (concl mc_length_th)))) / 4 in + let _ = Printf.printf "PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC: n: %d..\n" n in + DISCH_THEN (LABEL_TAC "HEVENTUALLY") THEN + REWRITE_TAC[eventually_n] THEN + CONJ_TAC THENL [ + (** SUBGOAL 1 **) + (* `!s'. steps arm .. s s' ==> P s s'` *) + X_GEN_TAC `s_final:armstate` THEN UNDISCH_LABEL_TAC "HEVENTUALLY" THEN + REPEAT_I_N 0 n + (fun i -> EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC + execth `s0:armstate` 0 i n THEN + DISCARD_OLDSTATE_TAC ("s" ^ (if i = (n-1) then "_final" else string_of_int (i+1)))) THEN + (* match last step: utilize the barrier instruction *) + ONCE_REWRITE_TAC[eventually_CASES] THEN + ASM_REWRITE_TAC[] THEN + STRIP_TAC THEN + (let stuck_th = (REWRITE_RULE[TAUT `(P/\Q/\R==>S) <=> (P==>Q==>R==>S)`] + ALIGNED_BYTES_LOADED_BARRIER_ARM_STUCK) in + FIRST_ASSUM (fun th -> + (* th is 'aligned_bytes_loaded s_final (word_add (word pc') (word 1828)) barrier_inst_bytes' *) + let th = REWRITE_RULE [WORD_RULE `word_add (word x) (word y):int64 = word (x+y)`] th in + let th2 = MATCH_MP stuck_th th in + ASM_MESON_TAC[th2])); + + (** SUBGOAL 2 **) + ONCE_REWRITE_TAC[ + ITAUT`(!x y. P y /\ Q x y ==> R x y) <=> (!y. P y ==> !x. Q x y ==> R x y)`] THEN + X_GEN_TAC `n0:num` THEN STRIP_TAC THEN GEN_TAC THEN + REPEAT_I_N 0 n (fun i -> + let n = mk_var("n" ^ (string_of_int i),`:num`) in + let np1 = mk_var("n" ^ (string_of_int (i+1)),`:num`) in + DISJ_CASES_THEN2 + SUBST1_TAC + (X_CHOOSE_THEN np1 + (fun th -> SUBST_ALL_TAC (REWRITE_RULE[ARITH_RULE`SUC x=1+x`] th))) + (SPEC n num_CASES) THENL [ + (** SUBGOAL 1 **) + SIMPLIFY_STEPS_0_TAC THEN + SOLVE_EXISTS_ARM_TAC (snd (CONJ_PAIR execth)); + + (** SUBGOAL 2 **) + EVENTUALLY_STEPS_EXISTS_STEP_TAC execth i (4*(i+1)) THEN + FIRST_X_ASSUM (fun th -> + let res = MATCH_MP (ARITH_RULE`!x. 1+x x<(n-1)`) th in + ASSUME_TAC (CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) res)) + ]) THEN + ASM_ARITH_TAC (* last is: 'n < 0' *) + ];; + + +(* ------------------------------------------------------------------------- *) +(* Functions that create statements that are related to program equivalence. *) +(* ------------------------------------------------------------------------- *) + +(* pc_mc_ofs: relative PC offset of the machine code located + in memory. Look at eventually_n_at_pc *) +(* pc_ofs: relative PC offset of the execution entry point. + Also look at eventually_n_at_pc *) +let mk_eventually_n_at_pc_statement + (assum:term) (quants:term list) (pc_mc_ofs:int) (mc:thm) + (pc_ofs:int) (s0_pred:term) = + let mk_pc ofs = + if ofs = 0 then `pc:num` + else mk_binary "+" (`pc:num`,mk_small_numeral ofs) in + let mc_pc = mk_pc pc_mc_ofs in + let mc_barrier = mk_binop + `APPEND:((8)word)list->((8)word)list->((8)word)list` + (lhs(concl mc)) `barrier_inst_bytes` in + let pc = mk_pc pc_ofs in + let nbytes = get_bytelist_length(rhs(concl mc)) in + let pc_end = mk_pc (pc_ofs + nbytes) in + let nsteps = mk_small_numeral(nbytes / 4) in + let body = list_mk_comb + (`eventually_n_at_pc`,[mc_pc;mc_barrier;pc;pc_end;nsteps;s0_pred]) in + list_mk_forall (`pc:num`::quants, (mk_imp (assum,body)));; + + +let mk_equiv_statement (assum:term) (equiv_in:thm) (equiv_out:thm) + (mc1:thm) (pc_ofs1:int) (maychange1:term) + (mc2:thm) (pc_ofs2:int) (maychange2:term):term = + let _ = List.map2 type_check + [assum;maychange1;maychange2] + [`:bool`;`:armstate->armstate->bool`;`:armstate->armstate->bool`] in + let quants_in,equiv_in_body = strip_forall (concl equiv_in) in + let quants_out,equiv_out_body = strip_forall (concl equiv_out) in + (* equiv_in and equiv_out's first two universal quantifiers must be armstates. *) + let quants_in_states,quants_in = takedrop 2 quants_in in + let quants_out_states,quants_out = takedrop 2 quants_out in + let _ = List.map2 type_check + (quants_in_states @ quants_out_states) + [`:armstate`;`:armstate`;`:armstate`;`:armstate`] in + let quants = union quants_in quants_out in + let quants = [`pc:num`;`pc2:num`] @ quants in + (* Now build 'ensures2' *) + let mk_aligned_bytes_loaded (s:term) (pc_var:term) (pc_ofs:int) (mc:term) = + let _ = List.map2 type_check [s;pc_var;mc] [`:armstate`;`:num`;`:((8)word)list`] in + if pc_ofs = 0 then + let template = `aligned_bytes_loaded s (word pc) mc` in + subst [s,`s:armstate`;mc,`mc:((8)word)list`;pc_var,`pc:num`] template + else + let template = `aligned_bytes_loaded s (word (pc + pc_ofs)) mc` in + subst [s,`s:armstate`;mc,`mc:((8)word)list`; + mk_small_numeral(pc_ofs),`pc_ofs:num`;pc_var,`pc:num`] + template + in + let mk_read_pc (s:term) (pc_var:term) (pc_ofs:int):term = + let _ = List.map2 type_check [s;pc_var] [`:armstate`;`:num`] in + if pc_ofs = 0 then + let template = `read PC s = word pc` in + subst [s,`s:armstate`;pc_var,`pc:num`] template + else + let template = `read PC s = word (pc+pc_ofs)` in + subst [s,`s:armstate`;pc_var,`pc:num`;mk_small_numeral(pc_ofs),`pc_ofs:num`] + template + in + let s = `s:armstate` and s2 = `s2:armstate` in + let pc = `pc:num` and pc2 = `pc2:num` in + let mc1_length = get_bytelist_length (rhs (concl mc1)) in + let mc2_length = get_bytelist_length (rhs (concl mc2)) in + let equiv_in_pred = fst (strip_comb (fst (dest_eq equiv_in_body))) in + let equiv_out_pred = fst (strip_comb (fst (dest_eq equiv_out_body))) in + + let precond = mk_gabs (`(s:armstate,s2:armstate)`, + (list_mk_conj [ + mk_aligned_bytes_loaded s pc pc_ofs1 (lhs (concl mc1)); + mk_read_pc s pc pc_ofs1; + mk_aligned_bytes_loaded s2 pc2 pc_ofs2 (lhs (concl mc2)); + mk_read_pc s2 pc2 pc_ofs2; + list_mk_comb (equiv_in_pred, (`(s:armstate,s2:armstate)`::quants_in)) + ])) in + let postcond = mk_gabs (`(s:armstate,s2:armstate)`, + (list_mk_conj [ + mk_aligned_bytes_loaded s pc pc_ofs1 (lhs (concl mc1)); + mk_read_pc s pc (pc_ofs1 + mc1_length); + mk_aligned_bytes_loaded s2 pc2 pc_ofs2 (lhs (concl mc2)); + mk_read_pc s2 pc2 (pc_ofs2 + mc2_length); + list_mk_comb (equiv_out_pred, (`(s:armstate,s2:armstate)`::quants_out)) + ])) in + let maychange = list_mk_gabs ( + [`(s:armstate,s2:armstate)`;`(s':armstate,s2':armstate)`], + mk_conj + (list_mk_comb (maychange1,[`s:armstate`;`s':armstate`]), + list_mk_comb (maychange2,[`s2:armstate`;`s2':armstate`]))) in + let nsteps1 = mk_abs (`s:armstate`,mk_small_numeral(mc1_length / 4)) in + let nsteps2 = mk_abs (`s:armstate`,mk_small_numeral(mc2_length / 4)) in + + let _ = List.iter (fun t -> Printf.printf "\t%s\n" (string_of_term t)) + [precond;postcond;maychange;nsteps1;nsteps2] in + let ensures2_pred = list_mk_comb + (`ensures2: + (armstate->armstate->bool)->(armstate#armstate->bool) + ->(armstate#armstate->bool) + ->(armstate#armstate->armstate#armstate->bool) + ->(armstate->num)->(armstate->num)->bool`, + [`arm`;precond;postcond;maychange;nsteps1;nsteps2]) in + let imp = mk_imp (assum,ensures2_pred) in + list_mk_forall (quants, imp);; + +(* Given a goal `ensures ..` which is a spec of program p, generate a + verification condition from two lemmas: + 1. equiv_th: a program equivalence theorem between p and another program p2 + 2. correct_n_th: a specification of p2 in `ensures_n`. + execths is a list of *_EXEC theorems for p1 and p2. + The result of tactic is conjunction of three clauses. *) +let VCGEN_EQUIV_TAC equiv_th correct_n_th (execths:thm list) = + let stepfn = + let b = snd (strip_forall (concl equiv_th)) in + let b = if is_imp b then snd (dest_imp b) else b in + snd (dest_comb b) in + if type_of stepfn <> `:armstate->num` + then failwith ("Not a step function: " ^ (string_of_term stepfn)) else + MATCH_MP_TAC ENSURES_N_ENSURES THEN + EXISTS_TAC stepfn THEN + (* Prepare a 'conjunction' of SPEC and EQUIV *) + MP_TAC ( + let ensures_n_part = SPEC_ALL correct_n_th in + let equiv_part = SPEC_ALL equiv_th in + let conj_ensures_n_equiv = CONJ ensures_n_part equiv_part in + MATCH_MP (TAUT`((P==>Q)/\(R==>S)) ==> ((P/\R)==>(Q/\S))`) conj_ensures_n_equiv) THEN + + (* Prove the nonoverlapping assumptions here: + (ASSUM ==> ensures_n) ==> ensures_n *) + W (fun (asl,g) -> + if is_imp g then + let r = ([ALL;NONOVERLAPPING_CLAUSES] @ execths) in + SUBGOAL_THEN (fst (dest_imp (fst (dest_imp g)))) + (fun th -> REWRITE_TAC[th]) THENL [ + REWRITE_TAC r THEN RULE_ASSUM_TAC(REWRITE_RULE r) THEN + REPEAT SPLIT_FIRST_CONJ_ASSUM_TAC THEN + REPEAT CONJ_TAC THEN NONOVERLAPPING_TAC; + ALL_TAC + ] + else ALL_TAC) THEN + + (* Conjunction of ensures2 and ensures_n *) + DISCH_THEN (fun th -> LABEL_TAC "H" + (REWRITE_RULE[] (MATCH_MP ENSURES_N_ENSURES2_CONJ th))) THEN + (* .. and apply H as a precondition of ENSURES2_ENSURES_N *) + REMOVE_THEN "H" (fun th -> + let th2 = MATCH_MP + (REWRITE_RULE [TAUT `(P/\P2/\P3==>Q) <=> P==>P2==>P3==>Q`] ENSURES2_ENSURES_N) th in + MATCH_MP_TAC (REWRITE_RULE [TAUT`(P==>Q==>R) <=> (P/\Q==>R)`] th2)) THEN + REWRITE_TAC[];; \ No newline at end of file diff --git a/arm/proofs/instruction.ml b/arm/proofs/instruction.ml index 277ae939..73cf4b3e 100644 --- a/arm/proofs/instruction.ml +++ b/arm/proofs/instruction.ml @@ -1416,6 +1416,24 @@ let arm_UMULL_VEC = define let mlzx:(128)word = usimd8 (word_zx:(8)word->(16)word) ml in (Rd := simd8 word_mul nlzx mlzx) s`;; +let arm_UMULL2_VEC = define + `arm_UMULL2_VEC Rd Rn Rm esize = + \s. // Get the low halfs + let nl:(64)word = word_subword (read Rn s:(128)word) (64,64):(64)word in + let ml:(64)word = word_subword (read Rm s:(128)word) (64,64):(64)word in + if esize = 32 then + let nlzx:(128)word = usimd2 (word_zx:(32)word->(64)word) nl in + let mlzx:(128)word = usimd2 (word_zx:(32)word->(64)word) ml in + (Rd := simd2 word_mul nlzx mlzx) s + else if esize = 16 then + let nlzx:(128)word = usimd4 (word_zx:(16)word->(32)word) nl in + let mlzx:(128)word = usimd4 (word_zx:(16)word->(32)word) ml in + (Rd := simd4 word_mul nlzx mlzx) s + else // esize = 8 + let nlzx:(128)word = usimd8 (word_zx:(8)word->(16)word) nl in + let mlzx:(128)word = usimd8 (word_zx:(8)word->(16)word) ml in + (Rd := simd8 word_mul nlzx mlzx) s`;; + let arm_USHR_VEC = define `arm_USHR_VEC Rd Rn amt esize datasize = \s. let n = read Rn s in @@ -2217,6 +2235,7 @@ let arm_TRN2_ALT = EXPAND_SIMD_RULE arm_TRN2;; let arm_UADDLP_ALT = EXPAND_SIMD_RULE arm_UADDLP;; let arm_UMLAL_VEC_ALT = EXPAND_SIMD_RULE arm_UMLAL_VEC;; let arm_UMULL_VEC_ALT = EXPAND_SIMD_RULE arm_UMULL_VEC;; +let arm_UMULL2_VEC_ALT = EXPAND_SIMD_RULE arm_UMULL2_VEC;; let arm_USHR_VEC_ALT = EXPAND_SIMD_RULE arm_USHR_VEC;; let arm_USRA_VEC_ALT = EXPAND_SIMD_RULE arm_USRA_VEC;; let arm_UZP1_ALT = EXPAND_SIMD_RULE arm_UZP1;; @@ -2251,7 +2270,7 @@ let ARM_OPERATION_CLAUSES = arm_SLI_VEC_ALT; arm_SMULH; arm_SUB; arm_SUB_VEC_ALT; arm_SUBS_ALT; arm_TRN1_ALT; arm_TRN2_ALT; arm_UADDLP_ALT; arm_UBFM; arm_UMOV; arm_UMADDL; arm_UMLAL_VEC_ALT; - arm_UMSUBL; arm_UMULL_VEC_ALT; arm_UMULH; + arm_UMSUBL; arm_UMULL_VEC_ALT; arm_UMULL2_VEC_ALT; arm_UMULH; arm_USHR_VEC_ALT; arm_USRA_VEC_ALT; arm_UZP1_ALT; arm_UZP2_ALT; arm_XTN_ALT; diff --git a/arm/proofs/neon_helper.ml b/arm/proofs/neon_helper.ml index bef16952..9eb0b4a2 100644 --- a/arm/proofs/neon_helper.ml +++ b/arm/proofs/neon_helper.ml @@ -308,17 +308,97 @@ let WORD_MUL64_HI = prove(`!(x: (64)word) (y: (64)word). (* Helpful tactics *) (* ------------------------------------------------------------------------- *) -(* match terms of pattern `read (memory :> bytes64 _) ) = _`. *) -let is_read_memory_bytes64 t = +(* match terms of pattern `read (memory :> bytes64 addr) s = v` and + return (addr, s, v). *) +let decompose_read_memory_bytes64 t: (term * term * term) option = if is_eq t then begin match lhs t with | Comb(Comb ( Const ("read", _), Comb( Comb(Const (":>", _),Const("memory", _)), - Comb(Const ("bytes64", _),_))),_) -> true - | _ -> false end - else false;; + Comb(Const ("bytes64", _),addr))),st) -> + Some (addr, st, rhs t) + | _ -> None end + else None;; + +let _ = decompose_read_memory_bytes64 `read (memory :> bytes64 addr) s = v`;; + +let option_get (x:'a option) = + match x with + | Some x' -> x' + | None -> failwith "option_get";; + +let are_consecutive_read_byte64s t1 t2 = + let addr1,_,_ = option_get (decompose_read_memory_bytes64 t1) in + let addr2,_,_ = option_get (decompose_read_memory_bytes64 t2) in + let eq = subst [addr1,`addr1:int64`; addr2,`addr2:int64`] + `word_add addr1 (word 8):int64 = addr2` in + can WORD_RULE eq;; + +let _ = are_consecutive_read_byte64s + `read (memory :> bytes64 (word_add addr (word 8))) s = v` + `read (memory :> bytes64 (word_add addr (word 16))) s = v2`;; + +(* Combine `read (memory :> bytes64 addr1) s = v1` + and `read (memory :> bytes64 addr2) s = v2` into + `read (memory :> bytes128 addr1) s = word_join v2 v1` if addr2 = addr1 + 8 *) +let COMBINE_READ_BYTES64_PAIRS_TAC = + (* Is (t1's read memory offset + 8) = t2's read memory offset? *) + let rec fn (asms:(thm*(term*term*term)) list) (idx:int): tactic = + if idx >= List.length asms then ALL_TAC else + let asm,(addr1,s,v1) = List.nth asms idx in + (*Printf.printf "-- idx %d: asm: %s; addr1: %s, s: %s, v1: %s\n" + idx + (string_of_thm asm) + (string_of_term addr1) + (string_of_term s) + (string_of_term v1);*) + let tac = List.map + (fun (asm2,(addr2,s2,v2)) -> + (* Are asm and asm2 consecutive memory reads? *) + let eq = subst [addr1,`addr1:int64`; addr2,`addr2:int64`] + `word_add addr1 (word 8):int64 = addr2` in + (*let _ = Printf.printf "can: %s\n" (string_of_term eq) in*) + if can WORD_RULE eq && s = s2 then + ((*Printf.printf "yes! asm2: %s; addr2: %s, s: %s, v2: %s\n" + (string_of_thm asm) + (string_of_term addr2) + (string_of_term s) + (string_of_term v2);*) + let lhs128 = subst [addr1,`addr1:int64`;s,`s:armstate`] + `read (memory :> bytes128 addr1) s` in + let hilo = mk_comb (mk_comb + (`word_join:(64)word->(64)word->(128)word`,v2),v1) in + Some (SUBGOAL_THEN (mk_eq (lhs128, hilo)) ASSUME_TAC THENL [ + REWRITE_TAC[GSYM asm;GSYM asm2] THEN + REWRITE_TAC[READ_MEMORY_BYTESIZED_SPLIT; WORD_ADD_ASSOC_CONSTS] THEN + (ARITH_TAC ORELSE (PRINT_GOAL_TAC THEN NO_TAC)); + ALL_TAC + ])) + else None) + asms in + let tac = List.filter (fun x -> x <> None) tac in + (match tac with | (Some tac)::_ -> tac | _ -> ALL_TAC) + THEN (fn asms (idx+1)) + in + W (fun (asl,_) -> + let asms = List.filter_map + (fun (_,th) -> + match decompose_read_memory_bytes64 (concl th) with + | Some t -> Some (th, t) + | None -> None) asl in + (*Printf.printf "# found read(memory :> bytes64) asms: %d\n" (List.length asms);*) + fn asms 0);; + +(* An example of COMBINE_READ_BYTES64_PAIRS_TAC *) +let _ = prove( + `read (memory :> bytes64 addr1) s = v1 /\ + read (memory :> bytes64 (word_add addr1 (word 8))) s = v2 + ==> read (memory :> bytes128 addr1) s = word_join v2 v1:int128`, + STRIP_TAC THEN + COMBINE_READ_BYTES64_PAIRS_TAC THEN + ASM_REWRITE_TAC[]);; let BYTES128_EQ_JOIN64_TAC lhs128 hi64 lo64 = let hilo = mk_comb (mk_comb @@ -327,7 +407,7 @@ let BYTES128_EQ_JOIN64_TAC lhs128 hi64 lo64 = EVERY_ASSUM (fun thm -> (*let t = is_read_memory_bytes64 (concl thm) in let _ = printf "%s \n" (string_of_term (concl thm)) t in*) - if is_read_memory_bytes64 (concl thm) + if decompose_read_memory_bytes64 (concl thm) <> None then REWRITE_TAC[GSYM thm] else ALL_TAC) THEN REWRITE_TAC[READ_MEMORY_BYTESIZED_SPLIT; WORD_ADD_ASSOC_CONSTS] THEN diff --git a/arm/proofs/simulator.ml b/arm/proofs/simulator.ml index 1b4d7232..8a157e33 100755 --- a/arm/proofs/simulator.ml +++ b/arm/proofs/simulator.ml @@ -283,6 +283,10 @@ let iclasses = "001011100x1xxxxx110000xxxxxxxxxx"; (* size!=11 *) "00101110101xxxxx110000xxxxxxxxxx"; (* size!=11 *) + (*** UMULL2 ***) + "011011100x1xxxxx110000xxxxxxxxxx"; (* size!=11 *) + "01101110101xxxxx110000xxxxxxxxxx"; (* size!=11 *) + (*** USHR (make sure immh is nonzero) ***) "0x10111101xxxxxx000001xxxxxxxxxx"; "0x101111001xxxxx000001xxxxxxxxxx"; diff --git a/benchmarks/benchmark.c b/benchmarks/benchmark.c index 47746cc9..97937a27 100644 --- a/benchmarks/benchmark.c +++ b/benchmarks/benchmark.c @@ -784,6 +784,8 @@ void call_bignum_kmul_16_32_neon(void) {} void call_bignum_ksqr_16_32_neon(void) {} void call_bignum_kmul_32_64_neon(void) {} void call_bignum_ksqr_32_64_neon(void) {} +void call_bignum_montmul_p256_neon(void) {} +void call_bignum_montsqr_p256_neon(void) {} void call_bignum_mul_8_16_neon(void) {} void call_bignum_sqr_8_16_neon(void) {} @@ -803,6 +805,8 @@ void call_bignum_kmul_16_32_neon(void) repeat(bignum_kmul_16_32_neon(b0,b1,b2,b3 void call_bignum_ksqr_16_32_neon(void) repeat(bignum_ksqr_16_32_neon(b0,b1,b2)) void call_bignum_kmul_32_64_neon(void) repeat(bignum_kmul_32_64_neon(b0,b1,b2,b3)) void call_bignum_ksqr_32_64_neon(void) repeat(bignum_ksqr_32_64_neon(b0,b1,b2)) +void call_bignum_montmul_p256_neon(void) repeat(bignum_montmul_p256_neon(b0,b1,b2)) +void call_bignum_montsqr_p256_neon(void) repeat(bignum_montsqr_p256_neon(b0,b1)) void call_bignum_mul_8_16_neon(void) repeat(bignum_mul_8_16_neon(b0,b1,b2)) void call_bignum_sqr_8_16_neon(void) repeat(bignum_sqr_8_16_neon(b0,b1)) @@ -1036,6 +1040,7 @@ int main(int argc, char *argv[]) timingtest(all,"bignum_montmul (32x32 -> 32)" ,call_bignum_montmul__32); timingtest(bmi,"bignum_montmul_p256",call_bignum_montmul_p256); timingtest(all,"bignum_montmul_p256_alt",call_bignum_montmul_p256_alt); + timingtest(arm,"bignum_montmul_p256_neon",call_bignum_montmul_p256_neon); timingtest(bmi,"bignum_montmul_p256k1",call_bignum_montmul_p256k1); timingtest(all,"bignum_montmul_p256k1_alt",call_bignum_montmul_p256k1_alt); timingtest(bmi,"bignum_montmul_p384",call_bignum_montmul_p384); @@ -1048,6 +1053,7 @@ int main(int argc, char *argv[]) timingtest(all,"bignum_montsqr (32 -> 32)" ,call_bignum_montsqr__32); timingtest(bmi,"bignum_montsqr_p256",call_bignum_montsqr_p256); timingtest(all,"bignum_montsqr_p256_alt",call_bignum_montsqr_p256_alt); + timingtest(arm,"bignum_montsqr_p256_neon",call_bignum_montsqr_p256_neon); timingtest(bmi,"bignum_montsqr_p256k1",call_bignum_montsqr_p256k1); timingtest(all,"bignum_montsqr_p256k1_alt",call_bignum_montsqr_p256k1_alt); timingtest(bmi,"bignum_montsqr_p384",call_bignum_montsqr_p384); diff --git a/common/misc.ml b/common/misc.ml index 95b88e30..88b50f5a 100644 --- a/common/misc.ml +++ b/common/misc.ml @@ -1527,10 +1527,26 @@ let HINT_EXISTS_REFL_TAC: tactic = let match_from_eq (t:term): term option = if not (is_eq t) then None else let eqlhs, eqrhs = dest_eq t in - if eqlhs = qvar_to_match && List.for_all (fun qvar -> not (vfree_in qvar eqrhs)) (List.tl qvars) then Some eqrhs - else if eqrhs = qvar_to_match && List.for_all (fun qvar -> not (vfree_in qvar eqlhs)) (List.tl qvars) then Some eqlhs + if eqlhs = qvar_to_match && List.for_all + (fun qvar -> not (vfree_in qvar eqrhs)) (List.tl qvars) + then Some eqrhs + else if eqrhs = qvar_to_match && List.for_all + (fun qvar -> not (vfree_in qvar eqlhs)) (List.tl qvars) + then Some eqlhs else None in let ll = List.map match_from_eq eqterms in match List.filter (fun t -> t <> None) ll with | [] -> failwith ("Cannot find a hint for " ^ (string_of_term qvar_to_match)) | (Some t)::_ -> EXISTS_TAC t (asl,g);; + +(* ------------------------------------------------------------------------- *) +(* A tiny checker function printing an informative diagnostic message *) +(* ------------------------------------------------------------------------- *) + +let type_check (t:term) (ty:hol_type): unit = + if type_of t <> ty then + failwith (Printf.sprintf "`%s` must have type `%s` but has `%s`" + (string_of_term t) (string_of_type ty) + (string_of_type (type_of t))) + else + ();; diff --git a/common/relational2.ml b/common/relational2.ml index e69a88b7..92609df2 100644 --- a/common/relational2.ml +++ b/common/relational2.ml @@ -170,24 +170,25 @@ let ensures_n = new_definition (****************************************************************************** We will describe why this definition was chosen by explaining problems of its alternative definitions. - First, a natural expansion of 'ensures' using the old eventually like this + First, a natural definition of 'ensures' using a product of steps like below is problematic: 'eventually (\(s1,s2) (s1',s2'). (step s1 s1' /\ s2 = s2') \/ (step s2 s2' /\ s1 = s1') P (sinit1,sinit2)' - because this has a 'starvation' problem. If s1's step is taken 'too much' - while s2 is fixed to make the predicate always false, we cannot prove - anything. - EVENTUALLY_PAIR_OF_STEP_DOESNT_WORK proves that with this definition we + Let's assume that the post-condition P must hold at (s1,s2) = (0,1). + Beginning from (0,0), if 'eventually' takes a step at s1 first: + (0,0) -> (1,0) + It cannot eventually reach at (0,1) because fst of (1,0) is already 1. + EVENTUALLY_PROD_OF_STEPS_BAD proves that with this definition we cannot reach to a state (s1,s2) = (0,1) even if we increment step by step from (s1,s2) = (0,0)! Note that bounding the number of steps that 'eventually' can take cannot be a solution, because for this example 2 steps is enough to make it false. *****************************************************************************) -let EVENTUALLY_PAIR_OF_STEP_DOESNT_WORK = +let EVENTUALLY_PROD_OF_STEPS_BAD = let lemma = prove( `!s. (0 < FST s /\ SND s = 0) ==> ~eventually @@ -229,7 +230,7 @@ let EVENTUALLY_PAIR_OF_STEP_DOESNT_WORK = However, this definition is also problematic because it cannot prove two natural properties of relational Hoare logic. - 1. It is not commutative (EVENTUALLY_CANNOT_SWAP). + 1. It is not commutative (EVENTUALLY_NESTED_DOES_NOT_COMMUTE). Two 'eventually's cannot be swapped, meaning that we cannot prove this natural property: { \s s'. pre s s' } { \s s'. pre s' s } @@ -251,7 +252,7 @@ let EVENTUALLY_PAIR_OF_STEP_DOESNT_WORK = (A) If s'' is sb, `eventually (\s'. P s' sb) sa` is false when sa -> sa1 (B) If s'' is sb', `eventually (\s'. P s' sb') sa` is false when sa -> sa2 - 2. It is not compositional (EVENTUALLY_CANNOT_COMPOSE). + 2. It is not compositional (EVENTUALLY_NESTED_DOES_NOT_COMPOSE). We cannot prove this: { \s s'. P s s' } { \s s'. Q s s' } { \s s'. P s' s } c1 c2 /\ c1' c2' =/=> c1;c1' c2;c2' @@ -263,7 +264,7 @@ let EVENTUALLY_PAIR_OF_STEP_DOESNT_WORK = - Q s s' := (s = sa1 \/ s = sa2) /\ s' = sb - R s s' := (s = sa1 /\ s' = sb) \/ (s = sa2 /\ s' = sb') ******************************************************************************) -let EVENTUALLY_CANNOT_SWAP = +let EVENTUALLY_NESTED_DOES_NOT_COMMUTE = let EVENTUALLY_STOP_TAC = ONCE_REWRITE_TAC[eventually_CASES] THEN BETA_TAC THEN DISJ1_TAC in let EVENTUALLY_NEXT_TAC = ONCE_REWRITE_TAC[eventually_CASES] THEN BETA_TAC THEN DISJ2_TAC in prove( @@ -325,7 +326,7 @@ let EVENTUALLY_CANNOT_SWAP = ] ]);; -let EVENTUALLY_CANNOT_COMPOSE = +let EVENTUALLY_NESTED_DOES_NOT_COMPOSE = let EVENTUALLY_STOP_TAC = ONCE_REWRITE_TAC[eventually_CASES] THEN BETA_TAC THEN DISJ1_TAC in let EVENTUALLY_NEXT_TAC = ONCE_REWRITE_TAC[eventually_CASES] THEN BETA_TAC THEN DISJ2_TAC in let ASM_SIMPLIFY_ALL_TAC = ASM_REWRITE_TAC[ @@ -478,6 +479,11 @@ let EVENTUALLY_N_STEP = ] ]);; +let EVENTUALLY_N_STEPS = + prove( + `!(step:S->S->bool) P (n:num) s. eventually_n step n P s ==> ?s'. steps step n s s'`, + MESON_TAC[eventually_n;STEPS_NOSTUCK]);; + let EVENTUALLY_N_MONO = prove( `!(step:S->S->bool) (P:S->bool) (Q:S->bool) n s. @@ -547,6 +553,32 @@ let EVENTUALLY_N_EVENTUALLY_STEPS = prove( EXISTS_TAC `s'':S` THEN ASM_MESON_TAC[] ]);; + +(****************************************************************************** + This shows that using nested eventually_n is equivalent to defining a + product of steps and using with explicitly giving the number of steps to + take on each program. +******************************************************************************) + +let NESTED_EVENTUALLY_N_IS_PROD_OF_STEPS = prove( + `!n1 n2 (s1:S) (s2:S) (step:S->S->bool) P. + eventually_n step n1 (\s1'. eventually_n step n2 (\s2'. P s1' s2') s2) s1 + <=> + ((!s1' s2'. steps step n1 s1 s1' /\ steps step n2 s2 s2' ==> P s1' s2') /\ + // There isn't a 'stuck' state at the end of a trace shorter than n + (!s1' n1'. (n1' < n1 /\ steps step n1' s1 s1') ==> ?s1''. step s1' s1'') /\ + (!s2' n2'. (n2' < n2 /\ steps step n2' s2 s2') ==> ?s2''. step s2' s2''))`, + + REPEAT GEN_TAC THEN + EQ_TAC THENL [ + DISCH_THEN (fun th -> + MP_TAC (CONJ th (MATCH_MP EVENTUALLY_N_SWAP th))) THEN + REWRITE_TAC [eventually_n] THEN STRIP_TAC THEN + ASM_MESON_TAC[]; + + REWRITE_TAC[eventually_n] THEN MESON_TAC[] + ]);; + (* ------------------------------------------------------------------------- *) (* Properties of ensures_n, ensures2_n. *) (* ------------------------------------------------------------------------- *) @@ -646,11 +678,101 @@ let ENSURES2_TRANS = prove( ASM_REWRITE_TAC[]);; let ENSURES2_FRAME_SUBSUMED = prove( - `!P Q C C' fn1 fn2. - C subsumed C' ==> ensures2 arm P Q C fn1 fn2 ==> ensures2 arm P Q C' fn1 fn2`, + `!(step:S->S->bool) P Q C C' fn1 fn2. + C subsumed C' ==> ensures2 step P Q C fn1 fn2 ==> ensures2 step P Q C' fn1 fn2`, REWRITE_TAC[subsumed;ensures2;eventually_n] THEN MESON_TAC[]);; +let ENSURES2_WEAKEN = prove( + `!(step:S->S->bool) P Q P' Q' C C' fn1 fn2. + (!s s'. P'(s,s') ==> P(s,s')) /\ + (!s s'. Q(s,s') ==> Q'(s,s')) /\ + C subsumed C' + ==> ensures2 step P Q C fn1 fn2 + ==> ensures2 step P' Q' C' fn1 fn2`, + REPEAT STRIP_TAC THEN + FIRST_X_ASSUM (fun th1 -> FIRST_X_ASSUM (fun th2 -> + MP_TAC (MATCH_MP (MATCH_MP ENSURES2_FRAME_SUBSUMED th1) th2))) THEN + REWRITE_TAC[ensures2] THEN + REPEAT STRIP_TAC THEN + SUBGOAL_THEN `eventually_n (step:S->S->bool) (fn1 s1) + (\s1'. eventually_n step (fn2 s2) + (\s2'. Q (s1',s2') /\ C' (s1,s2) (s1',s2')) + s2) + s1` MP_TAC THENL [ + ASM_MESON_TAC[]; ALL_TAC + ] THEN + MATCH_MP_TAC EVENTUALLY_N_MONO THEN REWRITE_TAC[] THEN GEN_TAC THEN + MATCH_MP_TAC EVENTUALLY_N_MONO THEN REWRITE_TAC[] THEN GEN_TAC THEN + ASM_MESON_TAC[]);; + +let ENSURES2_CONJ = prove( + `!(step:S->S->bool) P Q P' Q' C fn1 fn2. + ensures2 step P Q C fn1 fn2 /\ + ensures2 step P' Q' C fn1 fn2 + ==> ensures2 step + (\(s,s'). P (s,s') /\ P' (s,s')) + (\(s,s'). Q (s,s') /\ Q' (s,s')) + C fn1 fn2`, + + REWRITE_TAC[ensures2;eventually_n] THEN MESON_TAC[]);; + +let EVENTUALLY_N_NESTED = prove( + `!(step:S->S->bool) (s0:S). + eventually_n step n (\s. eventually_n step n (\s2. P s s2) s0) s0 ==> + eventually_n step n (\s. P s s) s0`, + REWRITE_TAC[eventually_n] THEN + REPEAT STRIP_TAC THEN + ASM_MESON_TAC[]);; + +let ENSURES2_CONJ2 = prove( + `!(step:S->S->bool) P Q P' Q' C1 C2 C3 fn1 fn2 fn3. + ensures2 step P Q + (\(s,s2) (s',s2'). C1 s s' /\ C2 s2 s2') + fn1 fn2 /\ + ensures2 step P' Q' + (\(s,s2) (s',s2'). C2 s s' /\ C3 s2 s2') + fn2 fn3 + ==> ensures2 step + (\(s,s'). ?s''. P (s,s'') /\ P' (s'',s')) + (\(s,s'). ?s'''. Q (s,s''') /\ Q' (s''',s')) + (\(s,s2) (s',s2'). C1 s s' /\ C3 s2 s2') + fn1 fn3`, + + REWRITE_TAC[ensures2] THEN + REPEAT GEN_TAC THEN + DISCH_THEN (fun th -> let a,b = CONJ_PAIR th in + REPEAT GEN_TAC THEN + STRIP_TAC THEN + FIRST_X_ASSUM (fun th -> LABEL_TAC "H1" (MATCH_MP a th)) THEN + FIRST_X_ASSUM (fun th -> LABEL_TAC "H2" (MATCH_MP b th))) THEN + REMOVE_THEN "H1" (fun th -> REMOVE_THEN "H2" (fun th2 -> + ASSUME_TAC (REWRITE_RULE [EVENTUALLY_N_P_INOUT] (CONJ th2 th)))) THEN + FIRST_X_ASSUM MP_TAC THEN MATCH_MP_TAC EVENTUALLY_N_MONO THEN + BETA_TAC THEN + REPEAT STRIP_TAC THEN + SUBGOAL_THEN `eventually_n (step:S->S->bool) (fn2 s'') + (\s2'. eventually_n step (fn2 s'') + (\s1''. eventually_n step (fn3 s2) + (\s2''. Q' (s1'',s2'') /\ C2 s'' s1'' /\ C3 s2 s2'' /\ + Q (s':S,s2') /\ + (C1:S->S->bool) s1 s' /\ + C2 s'' s2') + s2) + s'') + s''` MP_TAC THENL [ + FIRST_X_ASSUM MP_TAC THEN + MATCH_MP_TAC EVENTUALLY_N_MONO THEN REWRITE_TAC[] THEN GEN_TAC THEN + REWRITE_TAC[ONCE_REWRITE_RULE[CONJ_SYM] EVENTUALLY_N_P_INOUT] THEN + MATCH_MP_TAC EVENTUALLY_N_MONO THEN REWRITE_TAC[] THEN GEN_TAC THEN + MATCH_MP_TAC EVENTUALLY_N_MONO THEN MESON_TAC[]; + ALL_TAC + ] THEN + DISCH_THEN (fun th -> MP_TAC + (MATCH_MP EVENTUALLY_N_SWAP (MATCH_MP EVENTUALLY_N_NESTED th))) THEN + MATCH_MP_TAC EVENTUALLY_N_MONO THEN REWRITE_TAC[] THEN GEN_TAC THEN + DISCH_THEN (fun th -> MAP_EVERY MP_TAC [th;MATCH_MP EVENTUALLY_N_STEPS th]) THEN + MESON_TAC[eventually_n]);; (* A relational hoare triple version of ENSURES_INIT_TAC. *) let ENSURES2_INIT_TAC sname sname2 = diff --git a/include/s2n-bignum-c89.h b/include/s2n-bignum-c89.h index 8cf4ad25..bd248e27 100644 --- a/include/s2n-bignum-c89.h +++ b/include/s2n-bignum-c89.h @@ -534,6 +534,7 @@ extern void bignum_montmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, u /* Inputs x[4], y[4]; output z[4] */ extern void bignum_montmul_p256 (uint64_t z[4], uint64_t x[4], uint64_t y[4]); extern void bignum_montmul_p256_alt (uint64_t z[4], uint64_t x[4], uint64_t y[4]); +extern void bignum_montmul_p256_neon (uint64_t z[4], uint64_t x[4], uint64_t y[4]); /* Montgomery multiply, z := (x * y / 2^256) mod p_256k1 */ /* Inputs x[4], y[4]; output z[4] */ @@ -567,6 +568,7 @@ extern void bignum_montsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); /* Input x[4]; output z[4] */ extern void bignum_montsqr_p256 (uint64_t z[4], uint64_t x[4]); extern void bignum_montsqr_p256_alt (uint64_t z[4], uint64_t x[4]); +extern void bignum_montsqr_p256_neon (uint64_t z[4], uint64_t x[4]); /* Montgomery square, z := (x^2 / 2^256) mod p_256k1 */ /* Input x[4]; output z[4] */ diff --git a/include/s2n-bignum.h b/include/s2n-bignum.h index 7d11db03..8da9d043 100644 --- a/include/s2n-bignum.h +++ b/include/s2n-bignum.h @@ -541,6 +541,7 @@ extern void bignum_montmul (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *y, u // Inputs x[4], y[4]; output z[4] extern void bignum_montmul_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t x[S2N_BIGNUM_STATIC 4], uint64_t y[S2N_BIGNUM_STATIC 4]); extern void bignum_montmul_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t x[S2N_BIGNUM_STATIC 4], uint64_t y[S2N_BIGNUM_STATIC 4]); +extern void bignum_montmul_p256_neon (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t x[S2N_BIGNUM_STATIC 4], uint64_t y[S2N_BIGNUM_STATIC 4]); // Montgomery multiply, z := (x * y / 2^256) mod p_256k1 // Inputs x[4], y[4]; output z[4] @@ -574,6 +575,7 @@ extern void bignum_montsqr (uint64_t k, uint64_t *z, uint64_t *x, uint64_t *m); // Input x[4]; output z[4] extern void bignum_montsqr_p256 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t x[S2N_BIGNUM_STATIC 4]); extern void bignum_montsqr_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t x[S2N_BIGNUM_STATIC 4]); +extern void bignum_montsqr_p256_neon (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t x[S2N_BIGNUM_STATIC 4]); // Montgomery square, z := (x^2 / 2^256) mod p_256k1 // Input x[4]; output z[4] diff --git a/tests/test.c b/tests/test.c index af881a0b..faea7372 100644 --- a/tests/test.c +++ b/tests/test.c @@ -5902,9 +5902,10 @@ int test_bignum_montmul(void) return 0; } -int test_bignum_montmul_p256(void) +int test_bignum_montmul_p256_specific( + const char *name, void (*f)(uint64_t *, uint64_t *, uint64_t *)) { uint64_t t; - printf("Testing bignum_montmul_p256 with %d cases\n",tests); + printf("Testing %s with %d cases\n",name,tests); int c; for (t = 0; t < tests; ++t) @@ -5912,7 +5913,7 @@ int test_bignum_montmul_p256(void) reference_mod(4,b0,b2,p_256); random_bignum(4,b2); reference_mod(4,b1,b2,p_256); - bignum_montmul_p256(b4,b0,b1); + f(b4,b0,b1); reference_dmontmul(4,b3,b0,b1,p_256,i_256,b5); c = reference_compare(4,b3,4,b4); @@ -5934,36 +5935,25 @@ int test_bignum_montmul_p256(void) return 0; } -int test_bignum_montmul_p256_alt(void) -{ uint64_t t; - printf("Testing bignum_montmul_p256_alt with %d cases\n",tests); +int test_bignum_montmul_p256(void) +{ return test_bignum_montmul_p256_specific("test_bignum_montmul", + bignum_montmul_p256); +} - int c; - for (t = 0; t < tests; ++t) - { random_bignum(4,b2); - reference_mod(4,b0,b2,p_256); - random_bignum(4,b2); - reference_mod(4,b1,b2,p_256); - bignum_montmul_p256_alt(b4,b0,b1); - reference_dmontmul(4,b3,b0,b1,p_256,i_256,b5); +int test_bignum_montmul_p256_alt(void) +{ return test_bignum_montmul_p256_specific("test_bignum_montmul_alt", + bignum_montmul_p256_alt); +} - c = reference_compare(4,b3,4,b4); - if (c != 0) - { printf("### Disparity: [size %4"PRIu64"] " - "2^-256 * ...0x%016"PRIx64" * ...%016"PRIx64" mod p_256 = " - "0x%016"PRIx64"...%016"PRIx64" not 0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(4),b0[0],b1[0],b4[3],b4[0],b3[3],b3[0]); - return 1; - } - else if (VERBOSE) - { printf("OK: [size %4"PRIu64"] " - "2^-256 * ...0x%016"PRIx64" * ...%016"PRIx64" mod p_256 = " - "0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(4),b0[0],b1[0],b4[3],b4[0]); - } - } - printf("All OK\n"); - return 0; +int test_bignum_montmul_p256_neon(void) +{ +#ifdef __x86_64__ + // Do not call the neon function to avoid a linking failure error. + return 1; +#else + return test_bignum_montmul_p256_specific("test_bignum_montmul_neon", + bignum_montmul_p256_neon); +#endif } int test_bignum_montmul_p256k1(void) @@ -6315,15 +6305,16 @@ int test_bignum_montsqr(void) return 0; } -int test_bignum_montsqr_p256(void) +int test_bignum_montsqr_p256_specific(const char *name, + void (*f)(uint64_t *z, uint64_t *x)) { uint64_t t; - printf("Testing bignum_montsqr_p256 with %d cases\n",tests); + printf("Testing %s with %d cases\n",name,tests); int c; for (t = 0; t < tests; ++t) { random_bignum(4,b2); reference_mod(4,b0,b2,p_256); - bignum_montsqr_p256(b4,b0); + f(b4,b0); reference_dmontmul(4,b3,b0,b0,p_256,i_256,b5); c = reference_compare(4,b3,4,b4); @@ -6345,34 +6336,25 @@ int test_bignum_montsqr_p256(void) return 0; } -int test_bignum_montsqr_p256_alt(void) -{ uint64_t t; - printf("Testing bignum_montsqr_p256_alt with %d cases\n",tests); +int test_bignum_montsqr_p256(void) +{ return test_bignum_montsqr_p256_specific("test_bignum_montsqr", + bignum_montsqr_p256); +} - int c; - for (t = 0; t < tests; ++t) - { random_bignum(4,b2); - reference_mod(4,b0,b2,p_256); - bignum_montsqr_p256_alt(b4,b0); - reference_dmontmul(4,b3,b0,b0,p_256,i_256,b5); +int test_bignum_montsqr_p256_alt(void) +{ return test_bignum_montsqr_p256_specific("test_bignum_montsqr_alt", + bignum_montsqr_p256_alt); +} - c = reference_compare(4,b3,4,b4); - if (c != 0) - { printf("### Disparity: [size %4"PRIu64"] " - "2^-256 * ...0x%016"PRIx64"^2 mod p_256 = " - "0x%016"PRIx64"...%016"PRIx64" not 0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(4),b0[0],b4[3],b4[0],b3[3],b3[0]); - return 1; - } - else if (VERBOSE) - { printf("OK: [size %4"PRIu64"] " - "2^-256 * ...0x%016"PRIx64"^2 mod p_256 = " - "0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(4),b0[0],b4[3],b4[0]); - } - } - printf("All OK\n"); - return 0; +int test_bignum_montsqr_p256_neon(void) +{ +#ifdef __x86_64__ + // Do not call the neon function to avoid a linking failure error. + return 1; +#else + return test_bignum_montsqr_p256_specific("test_bignum_montsqr_neon", + bignum_montsqr_p256_neon); +#endif } int test_bignum_montsqr_p256k1(void) @@ -12172,6 +12154,8 @@ int main(int argc, char *argv[]) functionaltest(all,"bignum_kmul_32_64_neon", test_bignum_kmul_32_64_neon); functionaltest(all,"bignum_ksqr_16_32_neon",test_bignum_ksqr_16_32_neon); functionaltest(all,"bignum_ksqr_32_64_neon",test_bignum_ksqr_32_64_neon); + functionaltest(all,"bignum_montmul_p256_neon", test_bignum_montmul_p256_neon); + functionaltest(all,"bignum_montsqr_p256_neon", test_bignum_montsqr_p256_neon); functionaltest(all,"bignum_mul_8_16_neon",test_bignum_mul_8_16_neon); functionaltest(all,"bignum_sqr_8_16_neon",test_bignum_sqr_8_16_neon); } diff --git a/tools/external/slothy.sh b/tools/external/slothy.sh new file mode 100755 index 00000000..dd8352d2 --- /dev/null +++ b/tools/external/slothy.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +if [ "$#" -ne 2 ]; then + echo "slothy.sh " + exit 1 +fi + +SLOTHY_DIR=~/slothy + +set -e + +if [ ! -d "$SLOTHY_DIR" ]; then + echo "slothy does not exist at $SLOTHY_DIR . Please adjust the path by " + echo "modifying SLOTHY_DIR in this script (slothy.sh)." + exit 1 +fi + +source ${SLOTHY_DIR}/venv/bin/activate + +SRC_FILE=${1} +OUT_DIR=${2} +LOG_DIR=slothy-logs + +echo "SRC_FILE: ${SRC_FILE}" +echo "OUT_DIR: ${OUT_DIR}" +echo "LOG_DIR: ${LOG_DIR}" + +# Please properly adjust these parameters according to the description +# in the assembly file of s2n-bignum of interest + +if [ "$OUTPUTS" == "" ]; then + echo "Please set environment variable OUTPUTS as specified in the assembly" + echo "file of interest" + exit 1 +elif [ "$RESERVED_REGS" == "" ]; then + echo "Please set environment variable OUTPUTS as specified in the assembly" + echo "file of interest" + exit 1 +fi + +echo "OUTPUTS: ${OUTPUTS}" +echo "RESERVED_REGS: ${RESERVED_REGS}" + +REDIRECT_OUTPUT="--log --logdir=${LOG_DIR}" +mkdir -p $LOG_DIR +mkdir -p $OUT_DIR + +# "Rule of thumb is that you should pick the split factor so that the window size is somewhere around 100" +# Note that this will return a bloated number if the file contains many comments. +N_LINES=`wc -l ${SRC_FILE} | cut -d" " -f1` +echo "N_LINES=$N_LINES" +SPLIT_FACTOR=`python -c "print($N_LINES // 50)"` +echo "SPLIT_FACTOR=$SPLIT_FACTOR" + +UARCH=Arm_Neoverse_N1_experimental + +echo "** Step 1: Preprocessing" +${SLOTHY_DIR}/slothy-cli Arm_AArch64 $UARCH \ + ${SRC_FILE} \ + -o ${OUT_DIR}/1.preprocess.s \ + -c outputs=$OUTPUTS -c reserved_regs=$RESERVED_REGS \ + -c split_heuristic -c split_heuristic_repeat=0 \ + -c split_heuristic_preprocess_naive_interleaving \ + -c /visualize_reordering \ + $REDIRECT_OUTPUT + +echo "** Steps 2: Stepwise optimization, ignoring latencies" +# The goal here is to get a good amount of interleaving +# The best order of subregions is still TBD, but presently we 'comb' all stalls +# towards the middle of the code and repeat the process. The idea/hope is that +# by doing this multiple times, the stalls will eventually be absorbed. + +# When invoking slothy-cli, unuse -c variable_size by default because the +# timeout option is applied to its one large solver invocation. Without +# the option, slothy invokes a solver multiple times, each of which will take +# a shorter time & likely to fit in the given timeout (600s). A pitfall is that +# summation of the solver time without this option is very likely to be longer +# than the case when -c variable_size is enabled. Please feel free to add +# -c variable_time if you are familiar with the option & have a number +# that is confident enough to not cause timeout. + +${SLOTHY_DIR}/slothy-cli Arm_AArch64 $UARCH \ + ${OUT_DIR}/1.preprocess.s \ + -o ${OUT_DIR}/2.interleave.s \ + -c outputs=$OUTPUTS -c reserved_regs=$RESERVED_REGS \ + -c max_solutions=512 \ + -c timeout=600 \ + -c split_heuristic \ + -c objective_precision=0.05 \ + -c split_heuristic_factor=$SPLIT_FACTOR \ + -c constraints.model_latencies=False \ + -c /visualize_reordering \ + -c split_heuristic_repeat=10 \ + $REDIRECT_OUTPUT + +# Finally, also consider latencies + +SPLIT_FACTOR=$((N_LINES / 30)) + +echo "** Step 3: Consider latencies" +${SLOTHY_DIR}/slothy-cli Arm_AArch64 $UARCH \ + ${OUT_DIR}/2.interleave.s \ + -o ${OUT_DIR}/3.opt.s \ + -c outputs=$OUTPUTS -c reserved_regs=$RESERVED_REGS \ + -c max_solutions=512 \ + -c timeout=600 \ + -c split_heuristic \ + -c objective_precision=0.05 \ + -c split_heuristic_factor=$SPLIT_FACTOR \ + -c split_heuristic_repeat=10 \ + -c /visualize_reordering \ + $REDIRECT_OUTPUT \ No newline at end of file