diff --git a/arm/Makefile b/arm/Makefile index 5dcbd245e..281128e81 100644 --- a/arm/Makefile +++ b/arm/Makefile @@ -279,8 +279,10 @@ BIGNUM_OBJ = curve25519/bignum_add_p25519.o \ p384/bignum_mod_p384_6.o \ p384/bignum_montmul_p384.o \ p384/bignum_montmul_p384_alt.o \ + p384/bignum_montmul_p384_neon.o \ p384/bignum_montsqr_p384.o \ p384/bignum_montsqr_p384_alt.o \ + p384/bignum_montsqr_p384_neon.o \ p384/bignum_mux_6.o \ p384/bignum_neg_p384.o \ p384/bignum_nonzero_6.o \ diff --git a/arm/allowed_asm b/arm/allowed_asm index 85ca3e4a7..0f053a074 100644 --- a/arm/allowed_asm +++ b/arm/allowed_asm @@ -15,6 +15,7 @@ : csel$ : cset$ : csetm$ +: dup$ : eor$ : extr$ : ldp$ @@ -38,6 +39,7 @@ : sbc$ : sbcs$ : shl$ +: shrn$ : stp$ : str$ : strb$ diff --git a/arm/p384/Makefile b/arm/p384/Makefile index f5fc2aa1a..60687fb7c 100644 --- a/arm/p384/Makefile +++ b/arm/p384/Makefile @@ -35,8 +35,10 @@ OBJ = bignum_add_p384.o \ bignum_mod_p384_6.o \ bignum_montmul_p384.o \ bignum_montmul_p384_alt.o \ + bignum_montmul_p384_neon.o \ bignum_montsqr_p384.o \ bignum_montsqr_p384_alt.o \ + bignum_montsqr_p384_neon.o \ bignum_mux_6.o \ bignum_neg_p384.o \ bignum_nonzero_6.o \ diff --git a/arm/p384/bignum_montmul_p384_neon.S b/arm/p384/bignum_montmul_p384_neon.S new file mode 100644 index 000000000..08c296bc0 --- /dev/null +++ b/arm/p384/bignum_montmul_p384_neon.S @@ -0,0 +1,885 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery multiply, z := (x * y / 2^384) mod p_384 +// Inputs x[6], y[6]; output z[6] +// +// extern void bignum_montmul_p384_neon +// (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); +// +// Does z := (2^{-384} * x * y) mod p_384, assuming that the inputs x and y +// satisfy x * y <= 2^384 * p_384 (in particular this is true if we are in +// the "usual" case x < p_384 and y < p_384). +// +// Standard ARM ABI: X0 = z, X1 = x, X2 = y +// ---------------------------------------------------------------------------- + +// bignum_montmul_p384_neon is functionally equivalent to bignum_montmul_p384. +// It is written in a way that +// 1. A subset of scalar multiplications in bignum_montmul_p384 are carefully +// chosen and vectorized +// 2. The vectorized assembly is rescheduled using the SLOTHY superoptimizer. +// https://github.com/slothy-optimizer/slothy +// +// The output program of step 1. is as follows: +// +// stp x19, x20, [sp, #-16]! +// stp x21, x22, [sp, #-16]! +// stp x23, x24, [sp, #-16]! +// ldp x3, x21, [x1] +// ldr q30, [x1] +// ldp x8, x24, [x1, #16] +// ldp x5, x10, [x1, #32] +// ldp x13, x23, [x2] +// ldr q19, [x2] +// ldp x6, x14, [x2, #16] +// ldp x15, x17, [x2, #32] +// ldr q1, [x1, #32] +// ldr q28, [x2, #32] +// uzp1 v5.4S, v19.4S, v30.4S +// rev64 v19.4S, v19.4S +// uzp1 v0.4S, v30.4S, v30.4S +// mul v21.4S, v19.4S, v30.4S +// uaddlp v19.2D, v21.4S +// shl v19.2D, v19.2D, #32 +// umlal v19.2D, v0.2S, v5.2S +// mov x12, v19.d[0] +// mov x16, v19.d[1] +// mul x20, x8, x6 +// umulh x4, x3, x13 +// umulh x1, x21, x23 +// umulh x2, x8, x6 +// adds x4, x4, x16 +// adcs x19, x1, x20 +// adc x20, x2, xzr +// adds x11, x4, x12 +// adcs x16, x19, x4 +// adcs x1, x20, x19 +// adc x2, x20, xzr +// adds x7, x16, x12 +// adcs x4, x1, x4 +// adcs x9, x2, x19 +// adc x19, x20, xzr +// subs x2, x3, x21 +// cneg x20, x2, cc +// csetm x16, cc +// subs x2, x23, x13 +// cneg x2, x2, cc +// mul x1, x20, x2 +// umulh x2, x20, x2 +// cinv x16, x16, cc +// eor x1, x1, x16 +// eor x2, x2, x16 +// cmn x16, #0x1 +// adcs x11, x11, x1 +// adcs x7, x7, x2 +// adcs x4, x4, x16 +// adcs x9, x9, x16 +// adc x19, x19, x16 +// subs x2, x3, x8 +// cneg x20, x2, cc +// csetm x16, cc +// subs x2, x6, x13 +// cneg x2, x2, cc +// mul x1, x20, x2 +// umulh x2, x20, x2 +// cinv x16, x16, cc +// eor x1, x1, x16 +// eor x2, x2, x16 +// cmn x16, #0x1 +// adcs x7, x7, x1 +// adcs x4, x4, x2 +// adcs x9, x9, x16 +// adc x19, x19, x16 +// subs x2, x21, x8 +// cneg x20, x2, cc +// csetm x16, cc +// subs x2, x6, x23 +// cneg x2, x2, cc +// mul x1, x20, x2 +// umulh x2, x20, x2 +// cinv x16, x16, cc +// eor x1, x1, x16 +// eor x2, x2, x16 +// cmn x16, #0x1 +// adcs x4, x4, x1 +// adcs x20, x9, x2 +// adc x16, x19, x16 +// lsl x2, x12, #32 +// add x19, x2, x12 +// lsr x2, x19, #32 +// subs x1, x2, x19 +// sbc x2, x19, xzr +// extr x1, x2, x1, #32 +// lsr x2, x2, #32 +// adds x12, x2, x19 +// adc x2, xzr, xzr +// subs x1, x11, x1 +// sbcs x7, x7, x12 +// sbcs x4, x4, x2 +// sbcs x20, x20, xzr +// sbcs x16, x16, xzr +// sbc x9, x19, xzr +// lsl x2, x1, #32 +// add x19, x2, x1 +// lsr x2, x19, #32 +// subs x1, x2, x19 +// sbc x2, x19, xzr +// extr x1, x2, x1, #32 +// lsr x2, x2, #32 +// adds x12, x2, x19 +// adc x2, xzr, xzr +// subs x1, x7, x1 +// sbcs x4, x4, x12 +// sbcs x20, x20, x2 +// sbcs x16, x16, xzr +// sbcs x7, x9, xzr +// sbc x9, x19, xzr +// lsl x2, x1, #32 +// add x19, x2, x1 +// lsr x2, x19, #32 +// subs x1, x2, x19 +// sbc x2, x19, xzr +// extr x12, x2, x1, #32 +// lsr x2, x2, #32 +// adds x1, x2, x19 +// adc x2, xzr, xzr +// subs x4, x4, x12 +// sbcs x20, x20, x1 +// sbcs x16, x16, x2 +// sbcs x12, x7, xzr +// sbcs x1, x9, xzr +// sbc x2, x19, xzr +// stp x4, x20, [x0] // @slothy:writes=buffer0 +// stp x16, x12, [x0, #16] // @slothy:writes=buffer16 +// stp x1, x2, [x0, #32] // @slothy:writes=buffer32 +// mul x22, x24, x14 +// movi v31.2D, #0x00000000ffffffff +// uzp2 v16.4S, v28.4S, v28.4S +// xtn v6.2S, v1.2D +// xtn v30.2S, v28.2D +// rev64 v28.4S, v28.4S +// umull v5.2D, v6.2S, v30.2S +// umull v0.2D, v6.2S, v16.2S +// uzp2 v19.4S, v1.4S, v1.4S +// mul v20.4S, v28.4S, v1.4S +// usra v0.2D, v5.2D, #32 +// umull v1.2D, v19.2S, v16.2S +// uaddlp v24.2D, v20.4S +// and v5.16B, v0.16B, v31.16B +// umlal v5.2D, v19.2S, v30.2S +// shl v19.2D, v24.2D, #32 +// usra v1.2D, v0.2D, #32 +// umlal v19.2D, v6.2S, v30.2S +// usra v1.2D, v5.2D, #32 +// mov x20, v19.d[0] +// mov x16, v19.d[1] +// umulh x12, x24, x14 +// mov x1, v1.d[0] +// mov x2, v1.d[1] +// adds x4, x12, x20 +// adcs x20, x1, x16 +// adc x16, x2, xzr +// adds x7, x4, x22 +// adcs x12, x20, x4 +// adcs x1, x16, x20 +// adc x2, x16, xzr +// adds x9, x12, x22 +// adcs x19, x1, x4 +// adcs x4, x2, x20 +// adc x20, x16, xzr +// subs x2, x24, x5 +// cneg x16, x2, cc +// csetm x12, cc +// subs x2, x15, x14 +// cneg x2, x2, cc +// mul x1, x16, x2 +// umulh x2, x16, x2 +// cinv x12, x12, cc +// eor x1, x1, x12 +// eor x2, x2, x12 +// cmn x12, #0x1 +// adcs x11, x7, x1 +// adcs x9, x9, x2 +// adcs x19, x19, x12 +// adcs x4, x4, x12 +// adc x20, x20, x12 +// subs x2, x24, x10 +// cneg x16, x2, cc +// csetm x12, cc +// subs x2, x17, x14 +// cneg x2, x2, cc +// mul x1, x16, x2 +// umulh x2, x16, x2 +// cinv x12, x12, cc +// eor x1, x1, x12 +// eor x2, x2, x12 +// cmn x12, #0x1 +// adcs x7, x9, x1 +// adcs x19, x19, x2 +// adcs x4, x4, x12 +// adc x20, x20, x12 +// subs x2, x5, x10 +// cneg x16, x2, cc +// csetm x12, cc +// subs x2, x17, x15 +// cneg x2, x2, cc +// mul x1, x16, x2 +// umulh x2, x16, x2 +// cinv x16, x12, cc +// eor x1, x1, x16 +// eor x2, x2, x16 +// cmn x16, #0x1 +// adcs x19, x19, x1 +// adcs x12, x4, x2 +// adc x1, x20, x16 +// subs x2, x24, x3 +// sbcs x24, x5, x21 +// sbcs x21, x10, x8 +// ngc x5, xzr +// cmn x5, #0x1 +// eor x2, x2, x5 +// adcs x4, x2, xzr +// eor x2, x24, x5 +// adcs x20, x2, xzr +// eor x2, x21, x5 +// adc x16, x2, xzr +// subs x2, x13, x14 +// sbcs x24, x23, x15 +// sbcs x8, x6, x17 +// ngc x21, xzr +// cmn x21, #0x1 +// eor x2, x2, x21 +// adcs x15, x2, xzr +// eor x2, x24, x21 +// adcs x14, x2, xzr +// eor x2, x8, x21 +// adc x6, x2, xzr +// eor x9, x5, x21 +// ldp x21, x2, [x0] // @slothy:reads=buffer0 +// adds x10, x22, x21 +// adcs x5, x11, x2 +// ldp x21, x2, [x0, #16] // @slothy:reads=buffer16 +// adcs x24, x7, x21 +// adcs x8, x19, x2 +// ldp x21, x2, [x0, #32] // @slothy:reads=buffer32 +// adcs x21, x12, x21 +// adcs x2, x1, x2 +// adc x19, xzr, xzr +// stp x10, x5, [x0] // @slothy:writes=buffer0 +// stp x24, x8, [x0, #16] // @slothy:writes=buffer16 +// stp x21, x2, [x0, #32] // @slothy:writes=buffer32 +// mul x12, x4, x15 +// mul x5, x20, x14 +// mul x24, x16, x6 +// umulh x8, x4, x15 +// umulh x21, x20, x14 +// umulh x2, x16, x6 +// adds x10, x8, x5 +// adcs x5, x21, x24 +// adc x24, x2, xzr +// adds x23, x10, x12 +// adcs x8, x5, x10 +// adcs x21, x24, x5 +// adc x2, x24, xzr +// adds x13, x8, x12 +// adcs x1, x21, x10 +// adcs x10, x2, x5 +// adc x5, x24, xzr +// subs x2, x4, x20 +// cneg x24, x2, cc +// csetm x8, cc +// subs x2, x14, x15 +// cneg x2, x2, cc +// mul x21, x24, x2 +// umulh x2, x24, x2 +// cinv x8, x8, cc +// eor x21, x21, x8 +// eor x2, x2, x8 +// cmn x8, #0x1 +// adcs x23, x23, x21 +// adcs x13, x13, x2 +// adcs x1, x1, x8 +// adcs x10, x10, x8 +// adc x5, x5, x8 +// subs x2, x4, x16 +// cneg x24, x2, cc +// csetm x8, cc +// subs x2, x6, x15 +// cneg x2, x2, cc +// mul x21, x24, x2 +// umulh x2, x24, x2 +// cinv x8, x8, cc +// eor x21, x21, x8 +// eor x2, x2, x8 +// cmn x8, #0x1 +// adcs x4, x13, x21 +// adcs x13, x1, x2 +// adcs x1, x10, x8 +// adc x10, x5, x8 +// subs x2, x20, x16 +// cneg x24, x2, cc +// csetm x8, cc +// subs x2, x6, x14 +// cneg x2, x2, cc +// mul x21, x24, x2 +// umulh x2, x24, x2 +// cinv x5, x8, cc +// eor x21, x21, x5 +// eor x2, x2, x5 +// cmn x5, #0x1 +// adcs x24, x13, x21 +// adcs x8, x1, x2 +// adc x21, x10, x5 +// ldp x20, x16, [x0] // @slothy:reads=buffer0 +// ldp x17, x15, [x0, #16] // @slothy:reads=buffer16 +// ldp x14, x6, [x0, #32] // @slothy:reads=buffer32 +// cmn x9, #0x1 +// eor x2, x12, x9 +// adcs x12, x2, x20 +// eor x2, x23, x9 +// adcs x23, x2, x16 +// eor x2, x4, x9 +// adcs x13, x2, x17 +// eor x2, x24, x9 +// adcs x10, x2, x15 +// eor x2, x8, x9 +// adcs x5, x2, x14 +// eor x2, x21, x9 +// adcs x24, x2, x6 +// adcs x1, x9, x19 +// adcs x8, x9, xzr +// adcs x21, x9, xzr +// adc x2, x9, xzr +// adds x10, x10, x20 +// adcs x5, x5, x16 +// adcs x24, x24, x17 +// adcs x17, x1, x15 +// adcs x15, x8, x14 +// adcs x14, x21, x6 +// adc x6, x2, x19 +// lsl x2, x12, #32 +// add x1, x2, x12 +// lsr x2, x1, #32 +// subs x21, x2, x1 +// sbc x2, x1, xzr +// extr x21, x2, x21, #32 +// lsr x2, x2, #32 +// adds x8, x2, x1 +// adc x2, xzr, xzr +// subs x21, x23, x21 +// sbcs x23, x13, x8 +// sbcs x10, x10, x2 +// sbcs x5, x5, xzr +// sbcs x24, x24, xzr +// sbc x13, x1, xzr +// lsl x2, x21, #32 +// add x1, x2, x21 +// lsr x2, x1, #32 +// subs x21, x2, x1 +// sbc x2, x1, xzr +// extr x21, x2, x21, #32 +// lsr x2, x2, #32 +// adds x8, x2, x1 +// adc x2, xzr, xzr +// subs x21, x23, x21 +// sbcs x10, x10, x8 +// sbcs x5, x5, x2 +// sbcs x24, x24, xzr +// sbcs x23, x13, xzr +// sbc x13, x1, xzr +// lsl x2, x21, #32 +// add x1, x2, x21 +// lsr x2, x1, #32 +// subs x21, x2, x1 +// sbc x2, x1, xzr +// extr x8, x2, x21, #32 +// lsr x2, x2, #32 +// adds x21, x2, x1 +// adc x2, xzr, xzr +// subs x10, x10, x8 +// sbcs x5, x5, x21 +// sbcs x24, x24, x2 +// sbcs x8, x23, xzr +// sbcs x21, x13, xzr +// sbc x2, x1, xzr +// adds x23, x17, x8 +// adcs x13, x15, x21 +// adcs x1, x14, x2 +// adc x2, x6, xzr +// add x8, x2, #0x1 +// lsl x2, x8, #32 +// subs x21, x8, x2 +// sbc x2, x2, xzr +// adds x10, x10, x21 +// adcs x5, x5, x2 +// adcs x24, x24, x8 +// adcs x8, x23, xzr +// adcs x21, x13, xzr +// adcs x13, x1, xzr +// csetm x1, cc +// mov x2, #0xffffffff +// and x2, x2, x1 +// adds x10, x10, x2 +// eor x2, x2, x1 +// adcs x5, x5, x2 +// mov x2, #0xfffffffffffffffe +// and x2, x2, x1 +// adcs x24, x24, x2 +// adcs x8, x8, x1 +// adcs x21, x21, x1 +// adc x2, x13, x1 +// stp x10, x5, [x0] // @slothy:writes=buffer0 +// stp x24, x8, [x0, #16] // @slothy:writes=buffer16 +// stp x21, x2, [x0, #32] // @slothy:writes=buffer32 +// ldp x23, x24, [sp], #16 +// ldp x21, x22, [sp], #16 +// ldp x19, x20, [sp], #16 +// ret +// +// The bash script used for step 2 is as follows: +// +// # Store the assembly instructions except the last 'ret' and +// # callee-register store/loads as, say, 'input.S'. +// export OUTPUTS="[hint_buffer0,hint_buffer16,hint_buffer32]" +// export RESERVED_REGS="[x18,x25,x26,x27,x28,x29,x30,sp,q8,q9,q10,q11,q12,q13,q14,q15,v8,v9,v10,v11,v12,v13,v14,v15]" +// /tools/external/slothy.sh input.S my_out_dir +// # my_out_dir/3.opt.s is the optimized assembly. Its output may differ +// # from this file since the sequence is non-deterministically chosen. +// # Please add 'ret' at the end of the output assembly. + + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384_neon) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384_neon) + .text + .balign 4 + +S2N_BN_SYMBOL(bignum_montmul_p384_neon): + +// Save some registers + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + + ldr q3, [x1] + ldr q25, [x2] + ldp x13, x23, [x2] + ldp x3, x21, [x1] + rev64 v23.4S, v25.4S + uzp1 v17.4S, v25.4S, v3.4S + umulh x15, x3, x13 + mul v6.4S, v23.4S, v3.4S + uzp1 v3.4S, v3.4S, v3.4S + ldr q27, [x2, #32] + ldp x8, x24, [x1, #16] + subs x6, x3, x21 + ldr q0, [x1, #32] + movi v23.2D, #0x00000000ffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4S, v27.4S + uzp2 v25.4S, v27.4S, v27.4S + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2S, v0.2D + xtn v24.2S, v27.2D + cneg x20, x7, cc + ldp x6, x14, [x2, #16] + mul v27.4S, v4.4S, v0.4S + uaddlp v20.2D, v6.4S + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4S, v0.4S, v0.4S + umull v21.2D, v22.2S, v25.2S + shl v0.2D, v20.2D, #32 + umlal v0.2D, v3.2S, v17.2S + mul x22, x8, x6 + umull v1.2D, v6.2S, v25.2S + subs x12, x3, x8 + umull v20.2D, v22.2S, v24.2S + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2D, v20.2D, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2D, v21.2D, #32 + adds x22, x15, x7 + and v26.16B, v21.16B, v23.16B + adcs x16, x12, x15 + uaddlp v25.2D, v27.4S + adcs x9, x19, x12 + umlal v26.2D, v6.2S, v24.2S + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2D, v25.2D, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2D, v22.2S, v24.2S + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2D, v26.2D, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x2, #32] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x1, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [x0] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [x0, #16] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [x0, #32] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [x0] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [x0, #16] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [x0, #32] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [x0] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [x0, #16] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [x0, #32] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [x0] + ldp x21, x12, [x0, #16] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [x0, #32] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [x0] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [x0, #16] + adc x12, x15, x23 + stp x21, x12, [x0, #32] + +// Restore registers and return + + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/arm/p384/bignum_montsqr_p384_neon.S b/arm/p384/bignum_montsqr_p384_neon.S new file mode 100644 index 000000000..9be6380eb --- /dev/null +++ b/arm/p384/bignum_montsqr_p384_neon.S @@ -0,0 +1,665 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery square, z := (x^2 / 2^384) mod p_384 +// Input x[6]; output z[6] +// +// extern void bignum_montsqr_p384_neon +// (uint64_t z[static 6], uint64_t x[static 6]); +// +// Does z := (x^2 / 2^384) mod p_384, assuming x^2 <= 2^384 * p_384, which is +// guaranteed in particular if x < p_384 initially (the "intended" case). +// +// Standard ARM ABI: X0 = z, X1 = x +// ---------------------------------------------------------------------------- + +// bignum_montsqr_p384_neon is functionally equivalent to bignum_montsqr_p384. +// It is written in a way that +// 1. A subset of scalar multiplications in bignum_montsqr_p384 are carefully +// chosen and vectorized +// 2. The vectorized assembly is rescheduled using the SLOTHY superoptimizer. +// https://github.com/slothy-optimizer/slothy +// +// The output program of step 1. is as follows: +// +// ldp x9, x2, [x1] +// ldr q18, [x1] +// ldr q19, [x1] +// ldp x4, x6, [x1, #16] +// ldp x5, x10, [x1, #32] +// ldr q21, [x1, #32] +// ldr q28, [x1, #32] +// mul x12, x9, x2 +// mul x1, x9, x4 +// mul x13, x2, x4 +// movi v0.2D, #0x00000000ffffffff +// uzp2 v5.4S, v19.4S, v19.4S +// xtn v25.2S, v18.2D +// xtn v4.2S, v19.2D +// rev64 v23.4S, v19.4S +// umull v20.2D, v25.2S, v4.2S +// umull v30.2D, v25.2S, v5.2S +// uzp2 v19.4S, v18.4S, v18.4S +// mul v22.4S, v23.4S, v18.4S +// usra v30.2D, v20.2D, #32 +// umull v18.2D, v19.2S, v5.2S +// uaddlp v22.2D, v22.4S +// and v20.16B, v30.16B, v0.16B +// umlal v20.2D, v19.2S, v4.2S +// shl v19.2D, v22.2D, #32 +// usra v18.2D, v30.2D, #32 +// umlal v19.2D, v25.2S, v4.2S +// usra v18.2D, v20.2D, #32 +// mov x7, v19.d[0] +// mov x17, v19.d[1] +// mul x16, x4, x4 +// umulh x3, x9, x2 +// adds x15, x1, x3 +// umulh x1, x9, x4 +// adcs x13, x13, x1 +// umulh x1, x2, x4 +// adcs x8, x1, xzr +// mov x11, v18.d[0] +// mov x14, v18.d[1] +// umulh x1, x4, x4 +// adds x3, x12, x12 +// adcs x15, x15, x15 +// adcs x13, x13, x13 +// adcs x12, x8, x8 +// adc x1, x1, xzr +// adds x11, x11, x3 +// adcs x3, x17, x15 +// adcs x17, x14, x13 +// adcs x15, x16, x12 +// adc x13, x1, xzr +// lsl x1, x7, #32 +// add x16, x1, x7 +// lsr x1, x16, #32 +// subs x12, x1, x16 +// sbc x1, x16, xzr +// extr x12, x1, x12, #32 +// lsr x1, x1, #32 +// adds x7, x1, x16 +// adc x1, xzr, xzr +// subs x12, x11, x12 +// sbcs x11, x3, x7 +// sbcs x17, x17, x1 +// sbcs x15, x15, xzr +// sbcs x13, x13, xzr +// sbc x3, x16, xzr +// lsl x1, x12, #32 +// add x16, x1, x12 +// lsr x1, x16, #32 +// subs x12, x1, x16 +// sbc x1, x16, xzr +// extr x12, x1, x12, #32 +// lsr x1, x1, #32 +// adds x7, x1, x16 +// adc x1, xzr, xzr +// subs x12, x11, x12 +// sbcs x17, x17, x7 +// sbcs x15, x15, x1 +// sbcs x13, x13, xzr +// sbcs x11, x3, xzr +// sbc x3, x16, xzr +// lsl x1, x12, #32 +// add x16, x1, x12 +// lsr x1, x16, #32 +// subs x12, x1, x16 +// sbc x1, x16, xzr +// extr x7, x1, x12, #32 +// lsr x1, x1, #32 +// adds x12, x1, x16 +// adc x1, xzr, xzr +// subs x17, x17, x7 +// sbcs x15, x15, x12 +// sbcs x13, x13, x1 +// sbcs x7, x11, xzr +// sbcs x12, x3, xzr +// sbc x1, x16, xzr +// stp x17, x15, [x0] // @slothy:writes=buffer0 +// stp x13, x7, [x0, #16] // @slothy:writes=buffer16 +// stp x12, x1, [x0, #32] // @slothy:writes=buffer32 +// mul x14, x9, x6 +// mul x15, x2, x5 +// mul x13, x4, x10 +// umulh x7, x9, x6 +// umulh x12, x2, x5 +// umulh x1, x4, x10 +// adds x15, x7, x15 +// adcs x16, x12, x13 +// adc x13, x1, xzr +// adds x11, x15, x14 +// adcs x7, x16, x15 +// adcs x12, x13, x16 +// adc x1, x13, xzr +// adds x17, x7, x14 +// adcs x15, x12, x15 +// adcs x3, x1, x16 +// adc x16, x13, xzr +// subs x1, x9, x2 +// cneg x13, x1, cc +// csetm x7, cc +// subs x1, x5, x6 +// cneg x1, x1, cc +// mul x12, x13, x1 +// umulh x1, x13, x1 +// cinv x7, x7, cc +// eor x12, x12, x7 +// eor x1, x1, x7 +// cmn x7, #0x1 +// adcs x11, x11, x12 +// adcs x17, x17, x1 +// adcs x15, x15, x7 +// adcs x3, x3, x7 +// adc x16, x16, x7 +// subs x9, x9, x4 +// cneg x13, x9, cc +// csetm x7, cc +// subs x1, x10, x6 +// cneg x1, x1, cc +// mul x12, x13, x1 +// umulh x1, x13, x1 +// cinv x7, x7, cc +// eor x12, x12, x7 +// eor x1, x1, x7 +// cmn x7, #0x1 +// adcs x17, x17, x12 +// adcs x15, x15, x1 +// adcs x13, x3, x7 +// adc x7, x16, x7 +// subs x2, x2, x4 +// cneg x12, x2, cc +// csetm x1, cc +// subs x2, x10, x5 +// cneg x2, x2, cc +// mul x4, x12, x2 +// umulh x2, x12, x2 +// cinv x1, x1, cc +// eor x4, x4, x1 +// eor x2, x2, x1 +// cmn x1, #0x1 +// adcs x12, x15, x4 +// adcs x4, x13, x2 +// adc x2, x7, x1 +// adds x1, x14, x14 +// adcs x16, x11, x11 +// adcs x17, x17, x17 +// adcs x15, x12, x12 +// adcs x13, x4, x4 +// adcs x7, x2, x2 +// adc x12, xzr, xzr +// ldp x4, x2, [x0] // @slothy:reads=buffer0 +// adds x1, x1, x4 +// adcs x16, x16, x2 +// ldp x4, x2, [x0, #16] // @slothy:reads=buffer16 +// adcs x17, x17, x4 +// adcs x15, x15, x2 +// ldp x4, x2, [x0, #32] // @slothy:reads=buffer32 +// adcs x13, x13, x4 +// adcs x7, x7, x2 +// adc x11, x12, xzr +// lsl x2, x1, #32 +// add x12, x2, x1 +// lsr x2, x12, #32 +// subs x4, x2, x12 +// sbc x2, x12, xzr +// extr x4, x2, x4, #32 +// lsr x2, x2, #32 +// adds x1, x2, x12 +// adc x2, xzr, xzr +// subs x4, x16, x4 +// sbcs x16, x17, x1 +// sbcs x17, x15, x2 +// sbcs x15, x13, xzr +// sbcs x13, x7, xzr +// sbc x7, x12, xzr +// lsl x2, x4, #32 +// add x12, x2, x4 +// lsr x2, x12, #32 +// subs x4, x2, x12 +// sbc x2, x12, xzr +// extr x4, x2, x4, #32 +// lsr x2, x2, #32 +// adds x1, x2, x12 +// adc x2, xzr, xzr +// subs x4, x16, x4 +// sbcs x16, x17, x1 +// sbcs x17, x15, x2 +// sbcs x15, x13, xzr +// sbcs x13, x7, xzr +// sbc x7, x12, xzr +// lsl x2, x4, #32 +// add x12, x2, x4 +// lsr x2, x12, #32 +// subs x4, x2, x12 +// sbc x2, x12, xzr +// extr x1, x2, x4, #32 +// lsr x2, x2, #32 +// adds x4, x2, x12 +// adc x2, xzr, xzr +// subs x3, x16, x1 +// sbcs x17, x17, x4 +// sbcs x15, x15, x2 +// sbcs x1, x13, xzr +// sbcs x4, x7, xzr +// sbc x2, x12, xzr +// adds x13, x11, x1 +// adcs x7, x4, xzr +// adcs x12, x2, xzr +// adcs x16, xzr, xzr +// mul x2, x6, x6 +// adds x3, x3, x2 +// xtn v30.2S, v28.2D +// shrn v26.2S, v28.2D, #32 +// umull v26.2D, v30.2S, v26.2S +// shl v19.2D, v26.2D, #33 +// umlal v19.2D, v30.2S, v30.2S +// mov x1, v19.d[0] +// mov x4, v19.d[1] +// umulh x2, x6, x6 +// adcs x17, x17, x2 +// umulh x2, x5, x5 +// adcs x15, x15, x1 +// adcs x13, x13, x2 +// umulh x2, x10, x10 +// adcs x7, x7, x4 +// adcs x12, x12, x2 +// adc x16, x16, xzr +// dup v28.2D, x6 +// movi v0.2D, #0x00000000ffffffff +// uzp2 v5.4S, v21.4S, v21.4S +// xtn v25.2S, v28.2D +// xtn v4.2S, v21.2D +// rev64 v19.4S, v21.4S +// umull v30.2D, v25.2S, v4.2S +// umull v23.2D, v25.2S, v5.2S +// uzp2 v20.4S, v28.4S, v28.4S +// mul v19.4S, v19.4S, v28.4S +// usra v23.2D, v30.2D, #32 +// umull v18.2D, v20.2S, v5.2S +// uaddlp v19.2D, v19.4S +// and v30.16B, v23.16B, v0.16B +// umlal v30.2D, v20.2S, v4.2S +// shl v19.2D, v19.2D, #32 +// usra v18.2D, v23.2D, #32 +// umlal v19.2D, v25.2S, v4.2S +// usra v18.2D, v30.2D, #32 +// mov x6, v19.d[0] +// mov x1, v19.d[1] +// mul x4, x5, x10 +// mov x2, v18.d[0] +// adds x1, x1, x2 +// mov x2, v18.d[1] +// adcs x4, x4, x2 +// umulh x5, x5, x10 +// adc x2, x5, xzr +// adds x5, x6, x6 +// adcs x6, x1, x1 +// adcs x1, x4, x4 +// adcs x4, x2, x2 +// adc x2, xzr, xzr +// adds x17, x17, x5 +// adcs x15, x15, x6 +// adcs x13, x13, x1 +// adcs x7, x7, x4 +// adcs x12, x12, x2 +// adc x2, x16, xzr +// mov x5, #0xffffffff00000001 +// mov x6, #0xffffffff +// mov x1, #0x1 +// cmn x3, x5 +// adcs xzr, x17, x6 +// adcs xzr, x15, x1 +// adcs xzr, x13, xzr +// adcs xzr, x7, xzr +// adcs xzr, x12, xzr +// adc x2, x2, xzr +// neg x4, x2 +// and x2, x5, x4 +// adds x10, x3, x2 +// and x2, x6, x4 +// adcs x5, x17, x2 +// and x2, x1, x4 +// adcs x6, x15, x2 +// adcs x1, x13, xzr +// adcs x4, x7, xzr +// adc x2, x12, xzr +// stp x10, x5, [x0] // @slothy:writes=buffer0 +// stp x6, x1, [x0, #16] // @slothy:writes=buffer16 +// stp x4, x2, [x0, #32] // @slothy:writes=buffer32 +// ret +// +// The bash script used for step 2 is as follows: +// +// # Store the assembly instructions except the last 'ret' as, say, 'input.S'. +// export OUTPUTS="[hint_buffer0,hint_buffer16,hint_buffer32]" +// export RESERVED_REGS="[x18,x19,x20,x21,x22,x23,x24,x25,x26,x27,x28,x29,x30,sp,q8,q9,q10,q11,q12,q13,q14,q15,v8,v9,v10,v11,v12,v13,v14,v15]" +// /tools/external/slothy.sh input.S my_out_dir +// # my_out_dir/3.opt.s is the optimized assembly. Its output may differ +// # from this file since the sequence is non-deterministically chosen. +// # Please add 'ret' at the end of the output assembly. + + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384_neon) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384_neon) + .text + .balign 4 + +S2N_BN_SYMBOL(bignum_montsqr_p384_neon): + + ldr q1, [x1] + ldp x9, x2, [x1] + ldr q0, [x1] + ldp x4, x6, [x1, #16] + rev64 v21.4S, v1.4S + uzp2 v28.4S, v1.4S, v1.4S + umulh x7, x9, x2 + xtn v17.2S, v1.2D + mul v27.4S, v21.4S, v0.4S + ldr q20, [x1, #32] + xtn v30.2S, v0.2D + ldr q1, [x1, #32] + uzp2 v31.4S, v0.4S, v0.4S + ldp x5, x10, [x1, #32] + umulh x8, x9, x4 + uaddlp v3.2D, v27.4S + umull v16.2D, v30.2S, v17.2S + mul x16, x9, x4 + umull v27.2D, v30.2S, v28.2S + shrn v0.2S, v20.2D, #32 + xtn v7.2S, v20.2D + shl v20.2D, v3.2D, #32 + umull v3.2D, v31.2S, v28.2S + mul x3, x2, x4 + umlal v20.2D, v30.2S, v17.2S + umull v22.2D, v7.2S, v0.2S + usra v27.2D, v16.2D, #32 + umulh x11, x2, x4 + movi v21.2D, #0x00000000ffffffff + uzp2 v28.4S, v1.4S, v1.4S + adds x15, x16, x7 + and v5.16B, v27.16B, v21.16B + adcs x3, x3, x8 + usra v3.2D, v27.2D, #32 + dup v29.2D, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2D, v31.2S, v17.2S + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2D, v22.2D, #33 + xtn v25.2S, v29.2D + rev64 v31.4S, v1.4S + lsl x13, x14, #32 + uzp2 v6.4S, v29.4S, v29.4S + umlal v19.2D, v7.2S, v7.2S + usra v3.2D, v5.2D, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4S, v31.4S, v29.4S + xtn v4.2S, v1.2D + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2D, v25.2S, v28.2S + adcs x11, x16, x16 + umull v21.2D, v25.2S, v4.2S + mov x17, v3.d[0] + umull v18.2D, v6.2S, v28.2S + adc x16, x8, xzr + uaddlp v16.2D, v17.4S + movi v1.2D, #0x00000000ffffffff + subs x13, x13, x12 + usra v31.2D, v21.2D, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2D, v16.2D, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16B, v31.16B, v1.16B + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2D, v6.2S, v4.2S + usra v18.2D, v31.2D, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2D, v25.2S, v4.2S + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2D, v3.2D, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] // @slothy:writes=buffer0 + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] // @slothy:writes=buffer16 + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [x0, #32] // @slothy:writes=buffer32 + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] // @slothy:reads=buffer16 + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] // @slothy:reads=buffer0 + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] // @slothy:reads=buffer32 + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] // @slothy:writes=buffer0 + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] // @slothy:writes=buffer16 + adc x17, x14, xzr + stp x2, x17, [x0, #32] // depth 72 // @slothy:writes=buffer32 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/arm/proofs/arm.ml b/arm/proofs/arm.ml index 0b1b3ab47..fcebefe8b 100644 --- a/arm/proofs/arm.ml +++ b/arm/proofs/arm.ml @@ -913,6 +913,69 @@ let ARM_ADD_RETURN_STACK_TAC = ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN REWRITE_TAC[dqd_thm];; +(* ------------------------------------------------------------------------- *) +(* Version with a program defined as SUB_LIST of a bigger program. *) +(* ------------------------------------------------------------------------- *) + +(* Given a goal which is + `! pc. + + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) program_mc /\ + read PC s = word (pc + begin_ofs) /\ (s)) + (\s. read PC s = word (pc + (begin_ofs + n) /\ + (s)) + ()`, + prove it using correct_th which is + `|- ! pc. + + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) program_sub_mc /\ + read PC s = word pc /\ (s)) + (\s. read PC s = word (pc + n) /\ + (s)) + ()` + where program_sub_mc is SUB_LIST(begin_ofs,n) program_mc. + execths is the EXEC list of program_mc and program_sub_mc. *) +let ARM_SUB_LIST_OF_MC_TAC (correct_th:thm) (program_sub_mc_def:thm) + (execths:thm list): tactic = + W (fun (asl,g) -> + let vars,pc = + let xs = fst (strip_forall g) in + butlast xs, last xs in + let begin_ofs,n = + let rhs = snd (dest_eq (concl program_sub_mc_def)) in + dest_pair (rand(rator rhs)) in + if !arm_print_log then begin + Printf.printf "ARM_SUB_LIST_OF_MC_TAC: begin_ofs: %s, n: %s\n" + (string_of_term begin_ofs) (string_of_term n); + Printf.printf "\tvars: %s, pc: %s\n" + (String.concat "," (map string_of_term vars)) + (string_of_term pc) + end else (); + REPEAT STRIP_TAC THEN + MP_TAC (ISPECL (vars @ [mk_binary "+" (pc,begin_ofs)]) correct_th) THEN + (* Prove antedecent of correct_th *) + ANTS_TAC THENL [ + POP_ASSUM MP_TAC THEN + REWRITE_TAC(execths @ [ALL;NONOVERLAPPING_CLAUSES]) THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN NONOVERLAPPING_TAC; + ALL_TAC + ] THEN + + MATCH_MP_TAC ENSURES_SUBLEMMA_THM THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[ADD_0] THEN + REWRITE_TAC[program_sub_mc_def;WORD_ADD] THEN + IMP_REWRITE_TAC(CONJUNCTS ALIGNED_BYTES_LOADED_SUB_LIST) THEN + CONV_TAC NUM_DIVIDES_CONV; + + SUBSUMED_MAYCHANGE_TAC; + + MESON_TAC[ADD_ASSOC;ADD_0] + ]);; + (* ------------------------------------------------------------------------- *) (* Handling more general branch targets *) (* ------------------------------------------------------------------------- *) diff --git a/arm/proofs/bignum_montmul_p256.ml b/arm/proofs/bignum_montmul_p256.ml index e65ff29cf..4f77cba48 100644 --- a/arm/proofs/bignum_montmul_p256.ml +++ b/arm/proofs/bignum_montmul_p256.ml @@ -244,14 +244,14 @@ let p256shortredlemma = prove let BIGNUM_MONTMUL_P256_CORE_CORRECT = time prove (`!z x y a b pc. - nonoverlapping (word pc,0x2c0) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montmul_p256_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc + 0x2bc) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p256_core_mc) /\ (a * b <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) @@ -261,7 +261,8 @@ let BIGNUM_MONTMUL_P256_CORE_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `y:int64`; `a:num`; `b:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P256_CORE_EXEC] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN (*** Globalize the a * b <= 2 EXP 256 * p_256 assumption ***) @@ -675,14 +676,14 @@ let BIGNUM_MONTMUL_P256_CORE_CORRECT = time prove let BIGNUM_MONTMUL_P256_CORRECT = time prove (`!z x y a b pc. - nonoverlapping (word pc,0x2c0) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc + 0x2bc) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p256_core_mc) /\ (a * b <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) @@ -690,16 +691,9 @@ let BIGNUM_MONTMUL_P256_CORRECT = time prove X13; X14; X15; X16; X17] ,, MAYCHANGE [memory :> bytes(z,8 * 4)] ,, MAYCHANGE SOME_FLAGS)`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTMUL_P256_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - MAP_EVERY EXISTS_TAC [`x:int64`;`y:int64`] THEN ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montmul_p256_core_mc_def;BIGNUM_MONTMUL_P256_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTMUL_P256_CORE_CORRECT + bignum_montmul_p256_core_mc_def + [BIGNUM_MONTMUL_P256_CORE_EXEC;BIGNUM_MONTMUL_P256_EXEC]);; let BIGNUM_MONTMUL_P256_SUBROUTINE_CORRECT = time prove (`!z x y a b pc returnaddress. @@ -718,7 +712,8 @@ let BIGNUM_MONTMUL_P256_SUBROUTINE_CORRECT = time prove (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTMUL_P256_EXEC - BIGNUM_MONTMUL_P256_CORRECT);; + (REWRITE_RULE [BIGNUM_MONTMUL_P256_EXEC;BIGNUM_MONTMUL_P256_CORE_EXEC] + BIGNUM_MONTMUL_P256_CORRECT));; (* ------------------------------------------------------------------------- *) (* Show that it also works as "almost-Montgomery" if desired. That is, even *) @@ -736,14 +731,14 @@ let p256genshortredlemma = prove let BIGNUM_AMONTMUL_P256_CORE_CORRECT = time prove (`!z x y a b pc. - nonoverlapping (word pc,0x2c0) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montmul_p256_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc + 0x2bc) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p256_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; @@ -752,7 +747,8 @@ let BIGNUM_AMONTMUL_P256_CORE_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `y:int64`; `a:num`; `b:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P256_CORE_EXEC] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,4) s0` THEN @@ -1161,30 +1157,23 @@ let BIGNUM_AMONTMUL_P256_CORE_CORRECT = time prove let BIGNUM_AMONTMUL_P256_CORRECT = time prove (`!z x y a b pc. - nonoverlapping (word pc,0x2c0) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p256_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc + 0x2bc) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p256_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17] ,, MAYCHANGE [memory :> bytes(z,8 * 4)] ,, MAYCHANGE SOME_FLAGS)`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTMUL_P256_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - MAP_EVERY EXISTS_TAC [`x:int64`;`y:int64`] THEN ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montmul_p256_core_mc_def;BIGNUM_MONTMUL_P256_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTMUL_P256_CORE_CORRECT + bignum_montmul_p256_core_mc_def + [BIGNUM_MONTMUL_P256_CORE_EXEC;BIGNUM_MONTMUL_P256_EXEC]);; let BIGNUM_AMONTMUL_P256_SUBROUTINE_CORRECT = time prove (`!z x y a b pc returnaddress. @@ -1202,4 +1191,5 @@ let BIGNUM_AMONTMUL_P256_SUBROUTINE_CORRECT = time prove (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTMUL_P256_EXEC - BIGNUM_AMONTMUL_P256_CORRECT);; + (REWRITE_RULE[BIGNUM_MONTMUL_P256_CORE_EXEC;BIGNUM_MONTMUL_P256_EXEC] + BIGNUM_AMONTMUL_P256_CORRECT));; diff --git a/arm/proofs/bignum_montmul_p256_neon.ml b/arm/proofs/bignum_montmul_p256_neon.ml index 7d62cc00b..4a2fa38ef 100644 --- a/arm/proofs/bignum_montmul_p256_neon.ml +++ b/arm/proofs/bignum_montmul_p256_neon.ml @@ -281,8 +281,8 @@ let actions = [ let equiv_goal1 = mk_equiv_statement `ALL (nonoverlapping (z:int64,8 * 4)) - [(word pc:int64,LENGTH bignum_montmul_p256_mc); - (word pc2:int64,LENGTH bignum_montmul_p256_interm1_mc)]` + [(word pc:int64,LENGTH bignum_montmul_p256_core_mc); + (word pc2:int64,LENGTH bignum_montmul_p256_interm1_core_mc)]` equiv_input_states equiv_output_states bignum_montmul_p256_core_mc 0 @@ -301,13 +301,11 @@ extra_word_CONV := [GEN_REWRITE_CONV I [WORD_BITMANIP_SIMP_LEMMAS; WORD_MUL64_LO; WORD_MUL64_HI]] @ (!extra_word_CONV);; -let BIGNUM_MONTMUL_P256_EQUIV1 = prove(equiv_goal1, +let BIGNUM_MONTMUL_P256_CORE_EQUIV1 = prove(equiv_goal1, REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; - BIGNUM_MONTMUL_P256_EXEC; BIGNUM_MONTMUL_P256_CORE_EXEC; - BIGNUM_MONTMUL_P256_INTERM1_EXEC; BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC] THEN REPEAT STRIP_TAC THEN (** Initialize **) @@ -367,8 +365,8 @@ let bignum_montmul_p256_neon_core_mc_def, let equiv_goal2 = mk_equiv_statement `ALL (nonoverlapping (z:int64,8 * 4)) - [(word pc:int64,LENGTH bignum_montmul_p256_interm1_mc); - (word pc2:int64,LENGTH bignum_montmul_p256_neon_mc)]` + [(word pc:int64,LENGTH bignum_montmul_p256_interm1_core_mc); + (word pc2:int64,LENGTH bignum_montmul_p256_neon_core_mc)]` equiv_input_states equiv_output_states bignum_montmul_p256_interm1_core_mc 0 @@ -390,14 +388,12 @@ let inst_map = [ (* (state number, (equation, fresh var)) *) let state_to_abbrevs: (int * thm) list ref = ref [];; -let BIGNUM_MONTMUL_P256_EQUIV2 = prove( +let BIGNUM_MONTMUL_P256_CORE_EQUIV2 = prove( equiv_goal2, REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; - BIGNUM_MONTMUL_P256_INTERM1_EXEC; BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC; - BIGNUM_MONTMUL_P256_NEON_EXEC; BIGNUM_MONTMUL_P256_NEON_CORE_EXEC] THEN REPEAT STRIP_TAC THEN (** Initialize **) @@ -442,8 +438,8 @@ let BIGNUM_MONTMUL_P256_EQUIV2 = prove( let equiv_goal = mk_equiv_statement `ALL (nonoverlapping (z:int64,8 * 4)) - [(word pc:int64,LENGTH bignum_montmul_p256_mc); - (word pc2:int64,LENGTH bignum_montmul_p256_neon_mc)]` + [(word pc:int64,LENGTH bignum_montmul_p256_core_mc); + (word pc2:int64,LENGTH bignum_montmul_p256_neon_core_mc)]` equiv_input_states equiv_output_states bignum_montmul_p256_core_mc 0 @@ -461,28 +457,28 @@ let equiv_output_states_TRANS = prove( ==> equiv_output_states (s,s2) z`, MESON_TAC[equiv_output_states]);; -let BIGNUM_MONTMUL_P256_EQUIV = prove(equiv_goal, +let BIGNUM_MONTMUL_P256_CORE_EQUIV = prove(equiv_goal, REPEAT STRIP_TAC THEN SUBGOAL_THEN `?pc3. ALL (nonoverlapping (z,8 * 4)) - [word pc:int64, LENGTH bignum_montmul_p256_mc; - word pc3:int64, LENGTH bignum_montmul_p256_interm1_mc] /\ + [word pc:int64, LENGTH bignum_montmul_p256_core_mc; + word pc3:int64, LENGTH bignum_montmul_p256_interm1_core_mc] /\ ALL (nonoverlapping (z,8 * 4)) - [word pc3:int64, LENGTH bignum_montmul_p256_interm1_mc; - word pc2:int64, LENGTH bignum_montmul_p256_neon_mc] /\ + [word pc3:int64, LENGTH bignum_montmul_p256_interm1_core_mc; + word pc2:int64, LENGTH bignum_montmul_p256_neon_core_mc] /\ ALL (nonoverlapping - (word pc3:int64, LENGTH bignum_montmul_p256_interm1_mc)) + (word pc3:int64, LENGTH bignum_montmul_p256_interm1_core_mc)) [x,8 * 4; y,8 * 4] /\ 4 divides val (word pc3:int64)` MP_TAC THENL [ FIRST_X_ASSUM MP_TAC THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN ASM_REWRITE_TAC [ALL;NONOVERLAPPING_CLAUSES; - BIGNUM_MONTMUL_P256_INTERM1_EXEC;BIGNUM_MONTMUL_P256_NEON_EXEC; - BIGNUM_MONTMUL_P256_EXEC;GSYM CONJ_ASSOC] THEN + BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC; + BIGNUM_MONTMUL_P256_NEON_CORE_EXEC; + BIGNUM_MONTMUL_P256_CORE_EXEC;GSYM CONJ_ASSOC] THEN STRIP_TAC THEN ASM_REWRITE_TAC[] THEN FIND_HOLE_TAC; @@ -491,8 +487,8 @@ let BIGNUM_MONTMUL_P256_EQUIV = prove(equiv_goal, ] THEN STRIP_TAC THEN - FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P256_EQUIV1 th))) THEN - FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P256_EQUIV2 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P256_CORE_EQUIV1 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P256_CORE_EQUIV2 th))) THEN FIRST_X_ASSUM (fun c1 -> FIRST_X_ASSUM (fun c2 -> MP_TAC (REWRITE_RULE [] (MATCH_MP ENSURES2_CONJ2 (CONJ c1 c2))) @@ -501,9 +497,9 @@ let BIGNUM_MONTMUL_P256_EQUIV = prove(equiv_goal, (* break 'ALL nonoverlapping' in assumptions *) RULE_ASSUM_TAC (REWRITE_RULE[ ALLPAIRS;ALL; - BIGNUM_MONTMUL_P256_EXEC; - BIGNUM_MONTMUL_P256_NEON_EXEC; - BIGNUM_MONTMUL_P256_INTERM1_EXEC; + BIGNUM_MONTMUL_P256_CORE_EXEC; + BIGNUM_MONTMUL_P256_NEON_CORE_EXEC; + BIGNUM_MONTMUL_P256_INTERM1_CORE_EXEC; NONOVERLAPPING_CLAUSES]) THEN SPLIT_FIRST_CONJ_ASSUM_TAC THEN @@ -542,16 +538,24 @@ let BIGNUM_MONTMUL_P256_EQUIV = prove(equiv_goal, +(****************************************************************************** + Inducing BIGNUM_MONTMUL_P256_NEON_SUBROUTINE_CORRECT + from BIGNUM_MONTMUL_P256_CORE_CORRECT +******************************************************************************) + let event_n_at_pc_goal = mk_eventually_n_at_pc_statement - `nonoverlapping (word pc:int64,LENGTH bignum_montmul_p256_mc) - (z:int64,8 * 4)` - [`z:int64`;`x:int64`;`y:int64`] (*pc_mc_ofs*)0 bignum_montmul_p256_core_mc (*pc_ofs*)0 + `nonoverlapping + (word pc:int64, + LENGTH (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes)) + (z:int64,8 * 4)` + [`z:int64`;`x:int64`;`y:int64`] (*pc_mc_ofs*)0 + bignum_montmul_p256_core_mc (*pc_ofs*)0 `\s0. C_ARGUMENTS [z;x;y] s0`;; let BIGNUM_MONTMUL_P256_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTMUL_P256_CORE_EXEC; - BIGNUM_MONTMUL_P256_EXEC;BARRIER_INST_BYTES_LENGTH] THEN + BIGNUM_MONTMUL_P256_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN SUBGOAL_THEN `4 divides (LENGTH bignum_montmul_p256_core_mc)` (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; @@ -566,11 +570,6 @@ let BIGNUM_MONTMUL_P256_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_MONTMUL_P256_CORE_EXEC);; -(****************************************************************************** - Inducing BIGNUM_MONTMUL_P256_NEON_SUBROUTINE_CORRECT - from BIGNUM_MONTMUL_P256_CORE_CORRECT -******************************************************************************) - let BIGNUM_MONTMUL_P256_CORE_CORRECT_N = prove_correct_n BIGNUM_MONTMUL_P256_EXEC @@ -580,15 +579,15 @@ let BIGNUM_MONTMUL_P256_CORE_CORRECT_N = let BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT = prove( - `!z x a pc2. - nonoverlapping (word pc2,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + `!z x y a b pc2. + nonoverlapping (word pc2,LENGTH bignum_montmul_p256_neon_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc2) bignum_montmul_p256_neon_core_mc /\ read PC s = word pc2 /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc2 + 816) /\ + (\s. read PC s = word (pc2 + LENGTH bignum_montmul_p256_neon_core_mc) /\ (a * b <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) @@ -599,12 +598,15 @@ let BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT = prove( (* Prepare pc for the original program. *) SUBGOAL_THEN `?pc. - nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (z:int64,8 * 4) /\ - nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (x:int64,8 * 4) /\ - nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (y:int64,8 * 4) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes)) (z:int64,8 * 4) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes)) (x:int64,8 * 4) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes)) (y:int64,8 * 4) /\ 4 divides val (word pc:int64)` MP_TAC THENL [ - REWRITE_TAC[BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTMUL_P256_CORE_EXEC; + BARRIER_INST_BYTES_LENGTH;NONOVERLAPPING_CLAUSES;ALL] THEN FIND_HOLE_TAC; (** SUBGOAL 2 **) @@ -613,12 +615,12 @@ let BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT = prove( REPEAT_N 2 STRIP_TAC THEN - VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P256_EQUIV BIGNUM_MONTMUL_P256_CORE_CORRECT_N - [BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P256_CORE_EQUIV BIGNUM_MONTMUL_P256_CORE_CORRECT_N + [BIGNUM_MONTMUL_P256_CORE_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC] THEN (* unfold definitions that may block tactics *) - RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTMUL_P256_EXEC; - BIGNUM_MONTMUL_P256_NEON_EXEC]) THEN + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P256_CORE_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC]) THEN REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN REPEAT CONJ_TAC THENL [ (** SUBGOAL 1. Precond **) @@ -643,7 +645,9 @@ let BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT = prove( TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC); (** SUBGOAL 2. Postcond **) - MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTMUL_P256_CORE_EXEC; + BIGNUM_MONTMUL_P256_NEON_CORE_EXEC]; (** SUBGOAL 3. Frame **) MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] @@ -658,22 +662,16 @@ let BIGNUM_MONTMUL_P256_NEON_CORRECT = time prove C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc + 816) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p256_neon_core_mc) /\ (a * b <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a * b) MOD p_256)) (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montmul_p256_neon_core_mc_def;BIGNUM_MONTMUL_P256_NEON_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTMUL_P256_NEON_CORE_CORRECT + bignum_montmul_p256_neon_core_mc_def + [BIGNUM_MONTMUL_P256_NEON_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC]);; let BIGNUM_MONTMUL_P256_NEON_SUBROUTINE_CORRECT = time prove (`!z x y a b pc returnaddress. @@ -693,7 +691,7 @@ let BIGNUM_MONTMUL_P256_NEON_SUBROUTINE_CORRECT = time prove MAYCHANGE [memory :> bytes(z,8 * 4)])`, REWRITE_TAC[BIGNUM_MONTMUL_P256_NEON_EXEC] THEN ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTMUL_P256_NEON_EXEC - (REWRITE_RULE[BIGNUM_MONTMUL_P256_NEON_EXEC] BIGNUM_MONTMUL_P256_NEON_CORRECT));; + (REWRITE_RULE[BIGNUM_MONTMUL_P256_NEON_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC] BIGNUM_MONTMUL_P256_NEON_CORRECT));; (****************************************************************************** @@ -710,14 +708,14 @@ let BIGNUM_AMONTMUL_P256_CORE_CORRECT_N = let BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT = prove( `!z x y a b pc2. - nonoverlapping (word pc2,LENGTH bignum_montmul_p256_neon_mc) (z,8 * 4) + nonoverlapping (word pc2,LENGTH bignum_montmul_p256_neon_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc2) bignum_montmul_p256_neon_core_mc /\ read PC s = word pc2 /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc2 + 816) /\ + (\s. read PC s = word (pc2 + LENGTH bignum_montmul_p256_neon_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, @@ -727,12 +725,15 @@ let BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT = prove( (* Prepare pc for the original program. *) SUBGOAL_THEN `?pc. - nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (z:int64,8 * 4) /\ - nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (x:int64,8 * 4) /\ - nonoverlapping (word pc,LENGTH bignum_montmul_p256_mc) (y:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH + (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes)) (z:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH + (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes)) (x:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH + (APPEND bignum_montmul_p256_core_mc barrier_inst_bytes)) (y:int64,8 * 4) /\ 4 divides val (word pc:int64)` MP_TAC THENL [ - REWRITE_TAC[BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + REWRITE_TAC[LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH; + BIGNUM_MONTMUL_P256_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN FIND_HOLE_TAC; (** SUBGOAL 2 **) @@ -741,8 +742,8 @@ let BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT = prove( REPEAT_N 2 STRIP_TAC THEN - VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P256_EQUIV BIGNUM_AMONTMUL_P256_CORE_CORRECT_N - [BIGNUM_MONTMUL_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P256_CORE_EQUIV BIGNUM_AMONTMUL_P256_CORE_CORRECT_N + [BIGNUM_MONTMUL_P256_CORE_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC] THEN (* unfold definitions that may block tactics *) RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTMUL_P256_EXEC; @@ -771,7 +772,8 @@ let BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT = prove( TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P256_CORE_EXEC); (** SUBGOAL 2. Postcond **) - MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTMUL_P256_CORE_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC]; (** SUBGOAL 3. Frame **) MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] @@ -786,22 +788,15 @@ let BIGNUM_AMONTMUL_P256_NEON_CORRECT = time prove C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,4) s = a /\ bignum_from_memory (y,4) s = b) - (\s. read PC s = word (pc + 816) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p256_neon_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a * b) (mod p_256)) (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - MAP_EVERY EXISTS_TAC [`x:int64`;`y:int64`] THEN - ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montmul_p256_neon_core_mc_def;BIGNUM_MONTMUL_P256_NEON_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTMUL_P256_NEON_CORE_CORRECT + bignum_montmul_p256_neon_core_mc_def + [BIGNUM_MONTMUL_P256_NEON_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC]);; let BIGNUM_AMONTMUL_P256_NEON_SUBROUTINE_CORRECT = time prove (`!z x y a b pc returnaddress. @@ -820,5 +815,5 @@ let BIGNUM_AMONTMUL_P256_NEON_SUBROUTINE_CORRECT = time prove MAYCHANGE [memory :> bytes(z,8 * 4)])`, REWRITE_TAC[BIGNUM_MONTMUL_P256_NEON_EXEC] THEN ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTMUL_P256_NEON_EXEC - (REWRITE_RULE[BIGNUM_MONTMUL_P256_NEON_EXEC] BIGNUM_AMONTMUL_P256_NEON_CORRECT));; + (REWRITE_RULE[BIGNUM_MONTMUL_P256_NEON_EXEC;BIGNUM_MONTMUL_P256_NEON_CORE_EXEC] BIGNUM_AMONTMUL_P256_NEON_CORRECT));; diff --git a/arm/proofs/bignum_montmul_p384.ml b/arm/proofs/bignum_montmul_p384.ml index ad9dcb94a..83d4d888a 100644 --- a/arm/proofs/bignum_montmul_p384.ml +++ b/arm/proofs/bignum_montmul_p384.ml @@ -401,6 +401,15 @@ let bignum_montmul_p384_mc = let BIGNUM_MONTMUL_P384_EXEC = ARM_MK_EXEC_RULE bignum_montmul_p384_mc;; +(* bignum_montmul_p384_mc without ret. *) +let bignum_montmul_p384_core_mc_def, + bignum_montmul_p384_core_mc, + BIGNUM_MONTMUL_P384_CORE_EXEC = + mk_sublist_of_mc "bignum_montmul_p384_core_mc" + bignum_montmul_p384_mc + (`12`,`LENGTH bignum_montmul_p384_mc - 28`) + BIGNUM_MONTMUL_P384_EXEC;; + (* ------------------------------------------------------------------------- *) (* Proof. *) (* ------------------------------------------------------------------------- *) @@ -657,16 +666,16 @@ let montred_subst_tac execth regs n = DISCH_THEN(SUBST_ALL_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> b = a - c`));; -let BIGNUM_MONTMUL_P384_CORRECT = time prove +let BIGNUM_MONTMUL_P384_CORE_CORRECT = time prove (`!z x y a b pc. - nonoverlapping (word pc,0x600) (z,8 * 6) + nonoverlapping (word pc,LENGTH bignum_montmul_p384_core_mc) (z,8 * 6) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_mc /\ - read PC s = word(pc + 0xc) /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_core_mc /\ + read PC s = word pc /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,6) s = a /\ bignum_from_memory (y,6) s = b) - (\s. read PC s = word (pc + 0x5f0) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p384_core_mc) /\ (a * b <= 2 EXP 384 * p_384 ==> bignum_from_memory (z,6) s = (inverse_mod p_384 (2 EXP 384) * a * b) MOD p_384)) @@ -677,13 +686,14 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `y:int64`; `a:num`; `b:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + fst (CONJ_PAIR BIGNUM_MONTMUL_P384_CORE_EXEC)] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN (*** Globalize the a * b <= 2 EXP 384 * p_384 assumption ***) ASM_CASES_TAC `a * b <= 2 EXP 384 * p_384` THENL - [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTMUL_P384_EXEC (1--377)] THEN + [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (1--377)] THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,6) s0` THEN BIGNUM_DIGITIZE_TAC "y_" `bignum_from_memory (y,6) s0` THEN @@ -691,7 +701,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** First ADK block multiplying lower halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([7;8;9] @ (13--23) @ [29] @ (35--39) @ [45] @ (51--54) @ [60] @ (66--68)) (1--68) THEN SUBGOAL_THEN @@ -723,7 +733,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** the three Montgomery steps on the low half ***) - montred_tac BIGNUM_MONTMUL_P384_EXEC + montred_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X15;X1;X20;X19;X17;X16;X15; X21;X22;X23]` 68 THEN REPLICATE_TAC 2 (FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -731,12 +741,12 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X16;X15;X1;X20;X19;X17;X16; X21;X22;X23]` 83 THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X17;X16;X15;X1;X20;X19;X17; X21;X22;X23]` 98 THEN - ARM_STEPS_TAC BIGNUM_MONTMUL_P384_EXEC (114--116) THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (114--116) THEN SUBGOAL_THEN `2 EXP 192 * bignum_of_wordlist @@ -758,7 +768,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** Second ADK block multiplying upper halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([117;118;119] @ (123--133) @ [139] @ (145--149) @ [155] @ (161--164) @ [170] @ (176--178)) (117--178) THEN @@ -792,7 +802,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** First absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [179;180;181;185;187;189] (179--189) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN @@ -831,7 +841,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** Second absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [190;191;192;196;198;200] (190--200) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN @@ -870,7 +880,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** Collective sign-magnitude representation of middle product ***) - ARM_STEPS_TAC BIGNUM_MONTMUL_P384_EXEC [201] THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [201] THEN RULE_ASSUM_TAC(REWRITE_RULE[WORD_XOR_MASKS]) THEN ABBREV_TAC `msgn <=> ~(bignum_of_wordlist[x_3;x_4;x_5] < @@ -903,7 +913,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** the H + L' addition (a result that we then use twice) ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC (202--214) (202--214) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (202--214) (202--214) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN SUBGOAL_THEN `bignum_of_wordlist @@ -921,7 +931,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** Third and final ADK block computing the mid-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([215;216;217] @ (221--231) @ [237] @ (243--247) @ [253] @ (259--262) @ [268] @ (274--276)) (215--276) THEN @@ -956,7 +966,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** Big net accumulation computation absorbing cases over sign ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([282;284;286;288;290] @ (292--303)) (277--303) THEN SUBGOAL_THEN `2 EXP 192 * @@ -1004,7 +1014,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** Last three Montgomery steps to get the pre-reduced result ***) - montred_tac BIGNUM_MONTMUL_P384_EXEC + montred_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X15;X1;X20;X19;X17;X16;X15; X21;X22;X23]` 303 THEN FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -1012,13 +1022,13 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X16;X15;X1;X20;X19;X17;X16; X21;X22;X23]` 318 THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X17;X16;X15;X1;X20;X19;X17; X21;X22;X23]` 333 THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC (349--352) (349--352) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (349--352) (349--352) THEN ABBREV_TAC `t = bignum_of_wordlist [sum_s343; sum_s344; sum_s345; sum_s349; sum_s350; sum_s351; sum_s352]` THEN @@ -1089,7 +1099,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove MP_TAC(SPEC `t:num` p384shortredlemma) THEN ASM_REWRITE_TAC[] THEN CONV_TAC(LAND_CONV let_CONV) THEN STRIP_TAC THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC (357--362) (353--362) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (357--362) (353--362) THEN SUBGOAL_THEN `&2 pow 384 * (&(bitval carry_s362) - &1) + &(bignum_of_wordlist @@ -1117,7 +1127,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove BOUNDER_TAC[]; ALL_TAC] THEN - ARM_STEPS_TAC BIGNUM_MONTMUL_P384_EXEC [363] THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [363] THEN RULE_ASSUM_TAC(REWRITE_RULE [GSYM NOT_LT; COND_SWAP; GSYM WORD_MASK; SYM(WORD_REDUCE_CONV `word_not(word 0):int64`)]) THEN @@ -1125,7 +1135,7 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove (*** The final corrective masked addition ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [366;368;371;372;373;374] (364--377) THEN ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN ASM_REWRITE_TAC[] THEN @@ -1155,6 +1165,29 @@ let BIGNUM_MONTMUL_P384_CORRECT = time prove CONV_TAC WORD_REDUCE_CONV THEN DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC);; +let BIGNUM_MONTMUL_P384_CORRECT = time prove( + `!z x y a b pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p384_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_mc /\ + read PC s = word (pc+12) /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = word (pc + (12+LENGTH bignum_montmul_p384_core_mc)) /\ + (a * b <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a * b) MOD p_384)) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; + X10; X11; X12; X13; X14; X15; X16; X17; X19; + X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS)`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTMUL_P384_CORE_CORRECT + bignum_montmul_p384_core_mc_def + [BIGNUM_MONTMUL_P384_CORE_EXEC;BIGNUM_MONTMUL_P384_EXEC]);; + let BIGNUM_MONTMUL_P384_SUBROUTINE_CORRECT = time prove (`!z x y a b pc stackpointer returnaddress. aligned 16 stackpointer /\ @@ -1177,7 +1210,9 @@ let BIGNUM_MONTMUL_P384_SUBROUTINE_CORRECT = time prove MAYCHANGE [memory :> bytes(z,8 * 6); memory :> bytes(word_sub stackpointer (word 48),48)])`, ARM_ADD_RETURN_STACK_TAC - BIGNUM_MONTMUL_P384_EXEC BIGNUM_MONTMUL_P384_CORRECT + BIGNUM_MONTMUL_P384_EXEC + ((CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) o + REWRITE_RULE [BIGNUM_MONTMUL_P384_EXEC;BIGNUM_MONTMUL_P384_CORE_EXEC]) BIGNUM_MONTMUL_P384_CORRECT) `[X19;X20;X21;X22;X23;X24]` 48);; (* ------------------------------------------------------------------------- *) @@ -1194,16 +1229,16 @@ let p384genshortredlemma = prove n < q * p_384 + p_384`, CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[p_384] THEN ARITH_TAC);; -let BIGNUM_AMONTMUL_P384_CORRECT = time prove +let BIGNUM_AMONTMUL_P384_CORE_CORRECT = time prove (`!z x y a b pc. - nonoverlapping (word pc,0x600) (z,8 * 6) + nonoverlapping (word pc,LENGTH bignum_montmul_p384_core_mc) (z,8 * 6) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_mc /\ - read PC s = word(pc + 0xc) /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_core_mc /\ + read PC s = word(pc) /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,6) s = a /\ bignum_from_memory (y,6) s = b) - (\s. read PC s = word (pc + 0x5f0) /\ + (\s. read PC s = word (pc + LENGTH bignum_montmul_p384_core_mc) /\ (bignum_from_memory (z,6) s == inverse_mod p_384 (2 EXP 384) * a * b) (mod p_384)) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; @@ -1213,7 +1248,8 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `y:int64`; `a:num`; `b:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + fst (CONJ_PAIR BIGNUM_MONTMUL_P384_CORE_EXEC)] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,6) s0` THEN @@ -1222,7 +1258,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** First ADK block multiplying lower halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([7;8;9] @ (13--23) @ [29] @ (35--39) @ [45] @ (51--54) @ [60] @ (66--68)) (1--68) THEN SUBGOAL_THEN @@ -1254,7 +1290,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** the three Montgomery steps on the low half ***) - montred_tac BIGNUM_MONTMUL_P384_EXEC + montred_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X15;X1;X20;X19;X17;X16;X15; X21;X22;X23]` 68 THEN REPLICATE_TAC 2 (FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -1262,12 +1298,12 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X16;X15;X1;X20;X19;X17;X16; X21;X22;X23]` 83 THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X17;X16;X15;X1;X20;X19;X17; X21;X22;X23]` 98 THEN - ARM_STEPS_TAC BIGNUM_MONTMUL_P384_EXEC (114--116) THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (114--116) THEN SUBGOAL_THEN `2 EXP 192 * bignum_of_wordlist @@ -1289,7 +1325,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** Second ADK block multiplying upper halves. ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([117;118;119] @ (123--133) @ [139] @ (145--149) @ [155] @ (161--164) @ [170] @ (176--178)) (117--178) THEN @@ -1323,7 +1359,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** First absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [179;180;181;185;187;189] (179--189) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN @@ -1362,7 +1398,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** Second absolute difference computation ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [190;191;192;196;198;200] (190--200) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; WORD_RULE `word_sub (word 0) x = word_neg x`]) THEN @@ -1401,7 +1437,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** Collective sign-magnitude representation of middle product ***) - ARM_STEPS_TAC BIGNUM_MONTMUL_P384_EXEC [201] THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [201] THEN RULE_ASSUM_TAC(REWRITE_RULE[WORD_XOR_MASKS]) THEN ABBREV_TAC `msgn <=> ~(bignum_of_wordlist[x_3;x_4;x_5] < @@ -1434,7 +1470,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** the H + L' addition (a result that we then use twice) ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC (202--214) (202--214) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (202--214) (202--214) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN SUBGOAL_THEN `bignum_of_wordlist @@ -1452,7 +1488,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** Third and final ADK block computing the mid-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([215;216;217] @ (221--231) @ [237] @ (243--247) @ [253] @ (259--262) @ [268] @ (274--276)) (215--276) THEN @@ -1487,7 +1523,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** Big net accumulation computation absorbing cases over sign ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC ([282;284;286;288;290] @ (292--303)) (277--303) THEN SUBGOAL_THEN `2 EXP 192 * @@ -1535,7 +1571,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** Last three Montgomery steps to get the pre-reduced result ***) - montred_tac BIGNUM_MONTMUL_P384_EXEC + montred_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X15;X1;X20;X19;X17;X16;X15; X21;X22;X23]` 303 THEN FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -1543,13 +1579,13 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X16;X15;X1;X20;X19;X17;X16; X21;X22;X23]` 318 THEN - montred_subst_tac BIGNUM_MONTMUL_P384_EXEC + montred_subst_tac BIGNUM_MONTMUL_P384_CORE_EXEC `[X17;X16;X15;X1;X20;X19;X17; X21;X22;X23]` 333 THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC (349--352) (349--352) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (349--352) (349--352) THEN ABBREV_TAC `t = bignum_of_wordlist [sum_s343; sum_s344; sum_s345; sum_s349; sum_s350; sum_s351; sum_s352]` THEN @@ -1617,7 +1653,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove MP_TAC(SPEC `t:num` p384genshortredlemma) THEN ASM_REWRITE_TAC[] THEN CONV_TAC(LAND_CONV let_CONV) THEN STRIP_TAC THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC (357--362) (353--362) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC (357--362) (353--362) THEN SUBGOAL_THEN `&2 pow 384 * (&(bitval carry_s362) - &1) + &(bignum_of_wordlist @@ -1645,7 +1681,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove BOUNDER_TAC[]; ALL_TAC] THEN - ARM_STEPS_TAC BIGNUM_MONTMUL_P384_EXEC [363] THEN + ARM_STEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [363] THEN RULE_ASSUM_TAC(REWRITE_RULE [GSYM NOT_LT; COND_SWAP; GSYM WORD_MASK; SYM(WORD_REDUCE_CONV `word_not(word 0):int64`)]) THEN @@ -1653,7 +1689,7 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove (*** The final corrective masked addition ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC [366;368;371;372;373;374] (364--377) THEN ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN MATCH_MP_TAC(MESON[CONG; MOD_MOD_REFL] @@ -1684,6 +1720,28 @@ let BIGNUM_AMONTMUL_P384_CORRECT = time prove CONV_TAC WORD_REDUCE_CONV THEN DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC);; +let BIGNUM_AMONTMUL_P384_CORRECT = time prove + (`!z x y a b pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p384_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_mc /\ + read PC s = word(pc + 12) /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = word (pc + (12 + LENGTH bignum_montmul_p384_core_mc)) /\ + (bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a * b) (mod p_384)) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; + X10; X11; X12; X13; X14; X15; X16; X17; X19; + X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS)`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTMUL_P384_CORE_CORRECT + bignum_montmul_p384_core_mc_def + [BIGNUM_MONTMUL_P384_EXEC;BIGNUM_MONTMUL_P384_CORE_EXEC]);; + let BIGNUM_AMONTMUL_P384_SUBROUTINE_CORRECT = time prove (`!z x y a b pc stackpointer returnaddress. aligned 16 stackpointer /\ @@ -1705,5 +1763,8 @@ let BIGNUM_AMONTMUL_P384_SUBROUTINE_CORRECT = time prove MAYCHANGE [memory :> bytes(z,8 * 6); memory :> bytes(word_sub stackpointer (word 48),48)])`, ARM_ADD_RETURN_STACK_TAC - BIGNUM_MONTMUL_P384_EXEC BIGNUM_AMONTMUL_P384_CORRECT + BIGNUM_MONTMUL_P384_EXEC + ((CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) o + REWRITE_RULE[BIGNUM_MONTMUL_P384_EXEC; + BIGNUM_MONTMUL_P384_CORE_EXEC]) BIGNUM_AMONTMUL_P384_CORRECT) `[X19;X20;X21;X22;X23;X24]` 48);; diff --git a/arm/proofs/bignum_montmul_p384_neon.ml b/arm/proofs/bignum_montmul_p384_neon.ml new file mode 100644 index 000000000..1ac8486d5 --- /dev/null +++ b/arm/proofs/bignum_montmul_p384_neon.ml @@ -0,0 +1,1070 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(****************************************************************************** + The first program equivalence between the 'core' part of source program and + its SIMD-vectorized but not instruction-unscheduled program +******************************************************************************) + +needs "arm/proofs/bignum_montmul_p384.ml";; +needs "arm/proofs/equiv.ml";; +needs "arm/proofs/neon_helper.ml";; + +(* This is the intermediate program that is equivalent to both + bignum_montmul_p384 and bignum_montmul_p384_neon. This is a vectorized + version of bignum_montmul_p384 but instructions are unscheduled. *) + +let bignum_montmul_p384_interm1_ops:int list = [ + 0xa9bf53f3; (* stp x19, x20, [sp, #-16]! *) + 0xa9bf5bf5; (* stp x21, x22, [sp, #-16]! *) + 0xa9bf63f7; (* stp x23, x24, [sp, #-16]! *) + 0xa9405423; (* ldp x3, x21, [x1] *) + 0x3dc0003e; (* ldr q30, [x1] *) + 0xa9416028; (* ldp x8, x24, [x1, #16] *) + 0xa9422825; (* ldp x5, x10, [x1, #32] *) + 0xa9405c4d; (* ldp x13, x23, [x2] *) + 0x3dc00053; (* ldr q19, [x2] *) + 0xa9413846; (* ldp x6, x14, [x2, #16] *) + 0xa942444f; (* ldp x15, x17, [x2, #32] *) + 0x3dc00821; (* ldr q1, [x1, #32] *) + 0x3dc0085c; (* ldr q28, [x2, #32] *) + 0x4e9e1a65; (* uzp1 v5.4s, v19.4s, v30.4s *) + 0x4ea00a73; (* rev64 v19.4s, v19.4s *) + 0x4e9e1bc0; (* uzp1 v0.4s, v30.4s, v30.4s *) + 0x4ebe9e75; (* mul v21.4s, v19.4s, v30.4s *) + 0x6ea02ab3; (* uaddlp v19.2d, v21.4s *) + 0x4f605673; (* shl v19.2d, v19.2d, #32 *) + 0x2ea58013; (* umlal v19.2d, v0.2s, v5.2s *) + 0x4e083e6c; (* mov x12, v19.d[0] *) + 0x4e183e70; (* mov x16, v19.d[1] *) + 0x9b067d14; (* mul x20, x8, x6 *) + 0x9bcd7c64; (* umulh x4, x3, x13 *) + 0x9bd77ea1; (* umulh x1, x21, x23 *) + 0x9bc67d02; (* umulh x2, x8, x6 *) + 0xab100084; (* adds x4, x4, x16 *) + 0xba140033; (* adcs x19, x1, x20 *) + 0x9a1f0054; (* adc x20, x2, xzr *) + 0xab0c008b; (* adds x11, x4, x12 *) + 0xba040270; (* adcs x16, x19, x4 *) + 0xba130281; (* adcs x1, x20, x19 *) + 0x9a1f0282; (* adc x2, x20, xzr *) + 0xab0c0207; (* adds x7, x16, x12 *) + 0xba040024; (* adcs x4, x1, x4 *) + 0xba130049; (* adcs x9, x2, x19 *) + 0x9a1f0293; (* adc x19, x20, xzr *) + 0xeb150062; (* subs x2, x3, x21 *) + 0xda822454; (* cneg x20, x2, cc // cc = lo, ul, last *) + 0xda9f23f0; (* csetm x16, cc // cc = lo, ul, last *) + 0xeb0d02e2; (* subs x2, x23, x13 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027e81; (* mul x1, x20, x2 *) + 0x9bc27e82; (* umulh x2, x20, x2 *) + 0xda902210; (* cinv x16, x16, cc // cc = lo, ul, last *) + 0xca100021; (* eor x1, x1, x16 *) + 0xca100042; (* eor x2, x2, x16 *) + 0xb100061f; (* cmn x16, #0x1 *) + 0xba01016b; (* adcs x11, x11, x1 *) + 0xba0200e7; (* adcs x7, x7, x2 *) + 0xba100084; (* adcs x4, x4, x16 *) + 0xba100129; (* adcs x9, x9, x16 *) + 0x9a100273; (* adc x19, x19, x16 *) + 0xeb080062; (* subs x2, x3, x8 *) + 0xda822454; (* cneg x20, x2, cc // cc = lo, ul, last *) + 0xda9f23f0; (* csetm x16, cc // cc = lo, ul, last *) + 0xeb0d00c2; (* subs x2, x6, x13 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027e81; (* mul x1, x20, x2 *) + 0x9bc27e82; (* umulh x2, x20, x2 *) + 0xda902210; (* cinv x16, x16, cc // cc = lo, ul, last *) + 0xca100021; (* eor x1, x1, x16 *) + 0xca100042; (* eor x2, x2, x16 *) + 0xb100061f; (* cmn x16, #0x1 *) + 0xba0100e7; (* adcs x7, x7, x1 *) + 0xba020084; (* adcs x4, x4, x2 *) + 0xba100129; (* adcs x9, x9, x16 *) + 0x9a100273; (* adc x19, x19, x16 *) + 0xeb0802a2; (* subs x2, x21, x8 *) + 0xda822454; (* cneg x20, x2, cc // cc = lo, ul, last *) + 0xda9f23f0; (* csetm x16, cc // cc = lo, ul, last *) + 0xeb1700c2; (* subs x2, x6, x23 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027e81; (* mul x1, x20, x2 *) + 0x9bc27e82; (* umulh x2, x20, x2 *) + 0xda902210; (* cinv x16, x16, cc // cc = lo, ul, last *) + 0xca100021; (* eor x1, x1, x16 *) + 0xca100042; (* eor x2, x2, x16 *) + 0xb100061f; (* cmn x16, #0x1 *) + 0xba010084; (* adcs x4, x4, x1 *) + 0xba020134; (* adcs x20, x9, x2 *) + 0x9a100270; (* adc x16, x19, x16 *) + 0xd3607d82; (* lsl x2, x12, #32 *) + 0x8b0c0053; (* add x19, x2, x12 *) + 0xd360fe62; (* lsr x2, x19, #32 *) + 0xeb130041; (* subs x1, x2, x19 *) + 0xda1f0262; (* sbc x2, x19, xzr *) + 0x93c18041; (* extr x1, x2, x1, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab13004c; (* adds x12, x2, x19 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb010161; (* subs x1, x11, x1 *) + 0xfa0c00e7; (* sbcs x7, x7, x12 *) + 0xfa020084; (* sbcs x4, x4, x2 *) + 0xfa1f0294; (* sbcs x20, x20, xzr *) + 0xfa1f0210; (* sbcs x16, x16, xzr *) + 0xda1f0269; (* sbc x9, x19, xzr *) + 0xd3607c22; (* lsl x2, x1, #32 *) + 0x8b010053; (* add x19, x2, x1 *) + 0xd360fe62; (* lsr x2, x19, #32 *) + 0xeb130041; (* subs x1, x2, x19 *) + 0xda1f0262; (* sbc x2, x19, xzr *) + 0x93c18041; (* extr x1, x2, x1, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab13004c; (* adds x12, x2, x19 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb0100e1; (* subs x1, x7, x1 *) + 0xfa0c0084; (* sbcs x4, x4, x12 *) + 0xfa020294; (* sbcs x20, x20, x2 *) + 0xfa1f0210; (* sbcs x16, x16, xzr *) + 0xfa1f0127; (* sbcs x7, x9, xzr *) + 0xda1f0269; (* sbc x9, x19, xzr *) + 0xd3607c22; (* lsl x2, x1, #32 *) + 0x8b010053; (* add x19, x2, x1 *) + 0xd360fe62; (* lsr x2, x19, #32 *) + 0xeb130041; (* subs x1, x2, x19 *) + 0xda1f0262; (* sbc x2, x19, xzr *) + 0x93c1804c; (* extr x12, x2, x1, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab130041; (* adds x1, x2, x19 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb0c0084; (* subs x4, x4, x12 *) + 0xfa010294; (* sbcs x20, x20, x1 *) + 0xfa020210; (* sbcs x16, x16, x2 *) + 0xfa1f00ec; (* sbcs x12, x7, xzr *) + 0xfa1f0121; (* sbcs x1, x9, xzr *) + 0xda1f0262; (* sbc x2, x19, xzr *) + 0xa9005004; (* stp x4, x20, [x0] *) + 0xa9013010; (* stp x16, x12, [x0, #16] *) + 0xa9020801; (* stp x1, x2, [x0, #32] *) + 0x9b0e7f16; (* mul x22, x24, x14 *) + 0x6f00e5ff; (* movi v31.2d, #0xffffffff *) + 0x4e9c5b90; (* uzp2 v16.4s, v28.4s, v28.4s *) + 0x0ea12826; (* xtn v6.2s, v1.2d *) + 0x0ea12b9e; (* xtn v30.2s, v28.2d *) + 0x4ea00b9c; (* rev64 v28.4s, v28.4s *) + 0x2ebec0c5; (* umull v5.2d, v6.2s, v30.2s *) + 0x2eb0c0c0; (* umull v0.2d, v6.2s, v16.2s *) + 0x4e815833; (* uzp2 v19.4s, v1.4s, v1.4s *) + 0x4ea19f94; (* mul v20.4s, v28.4s, v1.4s *) + 0x6f6014a0; (* usra v0.2d, v5.2d, #32 *) + 0x2eb0c261; (* umull v1.2d, v19.2s, v16.2s *) + 0x6ea02a98; (* uaddlp v24.2d, v20.4s *) + 0x4e3f1c05; (* and v5.16b, v0.16b, v31.16b *) + 0x2ebe8265; (* umlal v5.2d, v19.2s, v30.2s *) + 0x4f605713; (* shl v19.2d, v24.2d, #32 *) + 0x6f601401; (* usra v1.2d, v0.2d, #32 *) + 0x2ebe80d3; (* umlal v19.2d, v6.2s, v30.2s *) + 0x6f6014a1; (* usra v1.2d, v5.2d, #32 *) + 0x4e083e74; (* mov x20, v19.d[0] *) + 0x4e183e70; (* mov x16, v19.d[1] *) + 0x9bce7f0c; (* umulh x12, x24, x14 *) + 0x4e083c21; (* mov x1, v1.d[0] *) + 0x4e183c22; (* mov x2, v1.d[1] *) + 0xab140184; (* adds x4, x12, x20 *) + 0xba100034; (* adcs x20, x1, x16 *) + 0x9a1f0050; (* adc x16, x2, xzr *) + 0xab160087; (* adds x7, x4, x22 *) + 0xba04028c; (* adcs x12, x20, x4 *) + 0xba140201; (* adcs x1, x16, x20 *) + 0x9a1f0202; (* adc x2, x16, xzr *) + 0xab160189; (* adds x9, x12, x22 *) + 0xba040033; (* adcs x19, x1, x4 *) + 0xba140044; (* adcs x4, x2, x20 *) + 0x9a1f0214; (* adc x20, x16, xzr *) + 0xeb050302; (* subs x2, x24, x5 *) + 0xda822450; (* cneg x16, x2, cc // cc = lo, ul, last *) + 0xda9f23ec; (* csetm x12, cc // cc = lo, ul, last *) + 0xeb0e01e2; (* subs x2, x15, x14 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027e01; (* mul x1, x16, x2 *) + 0x9bc27e02; (* umulh x2, x16, x2 *) + 0xda8c218c; (* cinv x12, x12, cc // cc = lo, ul, last *) + 0xca0c0021; (* eor x1, x1, x12 *) + 0xca0c0042; (* eor x2, x2, x12 *) + 0xb100059f; (* cmn x12, #0x1 *) + 0xba0100eb; (* adcs x11, x7, x1 *) + 0xba020129; (* adcs x9, x9, x2 *) + 0xba0c0273; (* adcs x19, x19, x12 *) + 0xba0c0084; (* adcs x4, x4, x12 *) + 0x9a0c0294; (* adc x20, x20, x12 *) + 0xeb0a0302; (* subs x2, x24, x10 *) + 0xda822450; (* cneg x16, x2, cc // cc = lo, ul, last *) + 0xda9f23ec; (* csetm x12, cc // cc = lo, ul, last *) + 0xeb0e0222; (* subs x2, x17, x14 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027e01; (* mul x1, x16, x2 *) + 0x9bc27e02; (* umulh x2, x16, x2 *) + 0xda8c218c; (* cinv x12, x12, cc // cc = lo, ul, last *) + 0xca0c0021; (* eor x1, x1, x12 *) + 0xca0c0042; (* eor x2, x2, x12 *) + 0xb100059f; (* cmn x12, #0x1 *) + 0xba010127; (* adcs x7, x9, x1 *) + 0xba020273; (* adcs x19, x19, x2 *) + 0xba0c0084; (* adcs x4, x4, x12 *) + 0x9a0c0294; (* adc x20, x20, x12 *) + 0xeb0a00a2; (* subs x2, x5, x10 *) + 0xda822450; (* cneg x16, x2, cc // cc = lo, ul, last *) + 0xda9f23ec; (* csetm x12, cc // cc = lo, ul, last *) + 0xeb0f0222; (* subs x2, x17, x15 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027e01; (* mul x1, x16, x2 *) + 0x9bc27e02; (* umulh x2, x16, x2 *) + 0xda8c2190; (* cinv x16, x12, cc // cc = lo, ul, last *) + 0xca100021; (* eor x1, x1, x16 *) + 0xca100042; (* eor x2, x2, x16 *) + 0xb100061f; (* cmn x16, #0x1 *) + 0xba010273; (* adcs x19, x19, x1 *) + 0xba02008c; (* adcs x12, x4, x2 *) + 0x9a100281; (* adc x1, x20, x16 *) + 0xeb030302; (* subs x2, x24, x3 *) + 0xfa1500b8; (* sbcs x24, x5, x21 *) + 0xfa080155; (* sbcs x21, x10, x8 *) + 0xda1f03e5; (* ngc x5, xzr *) + 0xb10004bf; (* cmn x5, #0x1 *) + 0xca050042; (* eor x2, x2, x5 *) + 0xba1f0044; (* adcs x4, x2, xzr *) + 0xca050302; (* eor x2, x24, x5 *) + 0xba1f0054; (* adcs x20, x2, xzr *) + 0xca0502a2; (* eor x2, x21, x5 *) + 0x9a1f0050; (* adc x16, x2, xzr *) + 0xeb0e01a2; (* subs x2, x13, x14 *) + 0xfa0f02f8; (* sbcs x24, x23, x15 *) + 0xfa1100c8; (* sbcs x8, x6, x17 *) + 0xda1f03f5; (* ngc x21, xzr *) + 0xb10006bf; (* cmn x21, #0x1 *) + 0xca150042; (* eor x2, x2, x21 *) + 0xba1f004f; (* adcs x15, x2, xzr *) + 0xca150302; (* eor x2, x24, x21 *) + 0xba1f004e; (* adcs x14, x2, xzr *) + 0xca150102; (* eor x2, x8, x21 *) + 0x9a1f0046; (* adc x6, x2, xzr *) + 0xca1500a9; (* eor x9, x5, x21 *) + 0xa9400815; (* ldp x21, x2, [x0] *) + 0xab1502ca; (* adds x10, x22, x21 *) + 0xba020165; (* adcs x5, x11, x2 *) + 0xa9410815; (* ldp x21, x2, [x0, #16] *) + 0xba1500f8; (* adcs x24, x7, x21 *) + 0xba020268; (* adcs x8, x19, x2 *) + 0xa9420815; (* ldp x21, x2, [x0, #32] *) + 0xba150195; (* adcs x21, x12, x21 *) + 0xba020022; (* adcs x2, x1, x2 *) + 0x9a1f03f3; (* adc x19, xzr, xzr *) + 0xa900140a; (* stp x10, x5, [x0] *) + 0xa9012018; (* stp x24, x8, [x0, #16] *) + 0xa9020815; (* stp x21, x2, [x0, #32] *) + 0x9b0f7c8c; (* mul x12, x4, x15 *) + 0x9b0e7e85; (* mul x5, x20, x14 *) + 0x9b067e18; (* mul x24, x16, x6 *) + 0x9bcf7c88; (* umulh x8, x4, x15 *) + 0x9bce7e95; (* umulh x21, x20, x14 *) + 0x9bc67e02; (* umulh x2, x16, x6 *) + 0xab05010a; (* adds x10, x8, x5 *) + 0xba1802a5; (* adcs x5, x21, x24 *) + 0x9a1f0058; (* adc x24, x2, xzr *) + 0xab0c0157; (* adds x23, x10, x12 *) + 0xba0a00a8; (* adcs x8, x5, x10 *) + 0xba050315; (* adcs x21, x24, x5 *) + 0x9a1f0302; (* adc x2, x24, xzr *) + 0xab0c010d; (* adds x13, x8, x12 *) + 0xba0a02a1; (* adcs x1, x21, x10 *) + 0xba05004a; (* adcs x10, x2, x5 *) + 0x9a1f0305; (* adc x5, x24, xzr *) + 0xeb140082; (* subs x2, x4, x20 *) + 0xda822458; (* cneg x24, x2, cc // cc = lo, ul, last *) + 0xda9f23e8; (* csetm x8, cc // cc = lo, ul, last *) + 0xeb0f01c2; (* subs x2, x14, x15 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027f15; (* mul x21, x24, x2 *) + 0x9bc27f02; (* umulh x2, x24, x2 *) + 0xda882108; (* cinv x8, x8, cc // cc = lo, ul, last *) + 0xca0802b5; (* eor x21, x21, x8 *) + 0xca080042; (* eor x2, x2, x8 *) + 0xb100051f; (* cmn x8, #0x1 *) + 0xba1502f7; (* adcs x23, x23, x21 *) + 0xba0201ad; (* adcs x13, x13, x2 *) + 0xba080021; (* adcs x1, x1, x8 *) + 0xba08014a; (* adcs x10, x10, x8 *) + 0x9a0800a5; (* adc x5, x5, x8 *) + 0xeb100082; (* subs x2, x4, x16 *) + 0xda822458; (* cneg x24, x2, cc // cc = lo, ul, last *) + 0xda9f23e8; (* csetm x8, cc // cc = lo, ul, last *) + 0xeb0f00c2; (* subs x2, x6, x15 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027f15; (* mul x21, x24, x2 *) + 0x9bc27f02; (* umulh x2, x24, x2 *) + 0xda882108; (* cinv x8, x8, cc // cc = lo, ul, last *) + 0xca0802b5; (* eor x21, x21, x8 *) + 0xca080042; (* eor x2, x2, x8 *) + 0xb100051f; (* cmn x8, #0x1 *) + 0xba1501a4; (* adcs x4, x13, x21 *) + 0xba02002d; (* adcs x13, x1, x2 *) + 0xba080141; (* adcs x1, x10, x8 *) + 0x9a0800aa; (* adc x10, x5, x8 *) + 0xeb100282; (* subs x2, x20, x16 *) + 0xda822458; (* cneg x24, x2, cc // cc = lo, ul, last *) + 0xda9f23e8; (* csetm x8, cc // cc = lo, ul, last *) + 0xeb0e00c2; (* subs x2, x6, x14 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027f15; (* mul x21, x24, x2 *) + 0x9bc27f02; (* umulh x2, x24, x2 *) + 0xda882105; (* cinv x5, x8, cc // cc = lo, ul, last *) + 0xca0502b5; (* eor x21, x21, x5 *) + 0xca050042; (* eor x2, x2, x5 *) + 0xb10004bf; (* cmn x5, #0x1 *) + 0xba1501b8; (* adcs x24, x13, x21 *) + 0xba020028; (* adcs x8, x1, x2 *) + 0x9a050155; (* adc x21, x10, x5 *) + 0xa9404014; (* ldp x20, x16, [x0] *) + 0xa9413c11; (* ldp x17, x15, [x0, #16] *) + 0xa942180e; (* ldp x14, x6, [x0, #32] *) + 0xb100053f; (* cmn x9, #0x1 *) + 0xca090182; (* eor x2, x12, x9 *) + 0xba14004c; (* adcs x12, x2, x20 *) + 0xca0902e2; (* eor x2, x23, x9 *) + 0xba100057; (* adcs x23, x2, x16 *) + 0xca090082; (* eor x2, x4, x9 *) + 0xba11004d; (* adcs x13, x2, x17 *) + 0xca090302; (* eor x2, x24, x9 *) + 0xba0f004a; (* adcs x10, x2, x15 *) + 0xca090102; (* eor x2, x8, x9 *) + 0xba0e0045; (* adcs x5, x2, x14 *) + 0xca0902a2; (* eor x2, x21, x9 *) + 0xba060058; (* adcs x24, x2, x6 *) + 0xba130121; (* adcs x1, x9, x19 *) + 0xba1f0128; (* adcs x8, x9, xzr *) + 0xba1f0135; (* adcs x21, x9, xzr *) + 0x9a1f0122; (* adc x2, x9, xzr *) + 0xab14014a; (* adds x10, x10, x20 *) + 0xba1000a5; (* adcs x5, x5, x16 *) + 0xba110318; (* adcs x24, x24, x17 *) + 0xba0f0031; (* adcs x17, x1, x15 *) + 0xba0e010f; (* adcs x15, x8, x14 *) + 0xba0602ae; (* adcs x14, x21, x6 *) + 0x9a130046; (* adc x6, x2, x19 *) + 0xd3607d82; (* lsl x2, x12, #32 *) + 0x8b0c0041; (* add x1, x2, x12 *) + 0xd360fc22; (* lsr x2, x1, #32 *) + 0xeb010055; (* subs x21, x2, x1 *) + 0xda1f0022; (* sbc x2, x1, xzr *) + 0x93d58055; (* extr x21, x2, x21, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab010048; (* adds x8, x2, x1 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb1502f5; (* subs x21, x23, x21 *) + 0xfa0801b7; (* sbcs x23, x13, x8 *) + 0xfa02014a; (* sbcs x10, x10, x2 *) + 0xfa1f00a5; (* sbcs x5, x5, xzr *) + 0xfa1f0318; (* sbcs x24, x24, xzr *) + 0xda1f002d; (* sbc x13, x1, xzr *) + 0xd3607ea2; (* lsl x2, x21, #32 *) + 0x8b150041; (* add x1, x2, x21 *) + 0xd360fc22; (* lsr x2, x1, #32 *) + 0xeb010055; (* subs x21, x2, x1 *) + 0xda1f0022; (* sbc x2, x1, xzr *) + 0x93d58055; (* extr x21, x2, x21, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab010048; (* adds x8, x2, x1 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb1502f5; (* subs x21, x23, x21 *) + 0xfa08014a; (* sbcs x10, x10, x8 *) + 0xfa0200a5; (* sbcs x5, x5, x2 *) + 0xfa1f0318; (* sbcs x24, x24, xzr *) + 0xfa1f01b7; (* sbcs x23, x13, xzr *) + 0xda1f002d; (* sbc x13, x1, xzr *) + 0xd3607ea2; (* lsl x2, x21, #32 *) + 0x8b150041; (* add x1, x2, x21 *) + 0xd360fc22; (* lsr x2, x1, #32 *) + 0xeb010055; (* subs x21, x2, x1 *) + 0xda1f0022; (* sbc x2, x1, xzr *) + 0x93d58048; (* extr x8, x2, x21, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab010055; (* adds x21, x2, x1 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb08014a; (* subs x10, x10, x8 *) + 0xfa1500a5; (* sbcs x5, x5, x21 *) + 0xfa020318; (* sbcs x24, x24, x2 *) + 0xfa1f02e8; (* sbcs x8, x23, xzr *) + 0xfa1f01b5; (* sbcs x21, x13, xzr *) + 0xda1f0022; (* sbc x2, x1, xzr *) + 0xab080237; (* adds x23, x17, x8 *) + 0xba1501ed; (* adcs x13, x15, x21 *) + 0xba0201c1; (* adcs x1, x14, x2 *) + 0x9a1f00c2; (* adc x2, x6, xzr *) + 0x91000448; (* add x8, x2, #0x1 *) + 0xd3607d02; (* lsl x2, x8, #32 *) + 0xeb020115; (* subs x21, x8, x2 *) + 0xda1f0042; (* sbc x2, x2, xzr *) + 0xab15014a; (* adds x10, x10, x21 *) + 0xba0200a5; (* adcs x5, x5, x2 *) + 0xba080318; (* adcs x24, x24, x8 *) + 0xba1f02e8; (* adcs x8, x23, xzr *) + 0xba1f01b5; (* adcs x21, x13, xzr *) + 0xba1f002d; (* adcs x13, x1, xzr *) + 0xda9f23e1; (* csetm x1, cc // cc = lo, ul, last *) + 0xb2407fe2; (* mov x2, #0xffffffff // #4294967295 *) + 0x8a010042; (* and x2, x2, x1 *) + 0xab02014a; (* adds x10, x10, x2 *) + 0xca010042; (* eor x2, x2, x1 *) + 0xba0200a5; (* adcs x5, x5, x2 *) + 0x92800022; (* mov x2, #0xfffffffffffffffe // #-2 *) + 0x8a010042; (* and x2, x2, x1 *) + 0xba020318; (* adcs x24, x24, x2 *) + 0xba010108; (* adcs x8, x8, x1 *) + 0xba0102b5; (* adcs x21, x21, x1 *) + 0x9a0101a2; (* adc x2, x13, x1 *) + 0xa900140a; (* stp x10, x5, [x0] *) + 0xa9012018; (* stp x24, x8, [x0, #16] *) + 0xa9020815; (* stp x21, x2, [x0, #32] *) + 0xa8c163f7; (* ldp x23, x24, [sp], #16 *) + 0xa8c15bf5; (* ldp x21, x22, [sp], #16 *) + 0xa8c153f3; (* ldp x19, x20, [sp], #16 *) + 0xd65f03c0; (* ret *) +];; + +let bignum_montmul_p384_interm1_mc = + let charlist = List.concat_map + (fun op32 -> + [Char.chr (Int.logand op32 255); + Char.chr (Int.logand (Int.shift_right op32 8) 255); + Char.chr (Int.logand (Int.shift_right op32 16) 255); + Char.chr (Int.logand (Int.shift_right op32 24) 255)]) + bignum_montmul_p384_interm1_ops in + let byte_list = Bytes.init (List.length charlist) (fun i -> List.nth charlist i) in + define_word_list "bignum_montmul_p384_interm1_mc" (term_of_bytes byte_list);; + +let BIGNUM_MONTMUL_P384_INTERM1_EXEC = + ARM_MK_EXEC_RULE bignum_montmul_p384_interm1_mc;; + +let bignum_montmul_p384_interm1_core_mc_def, + bignum_montmul_p384_interm1_core_mc, + BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC = + mk_sublist_of_mc "bignum_montmul_p384_interm1_core_mc" + bignum_montmul_p384_interm1_mc + (`12`,`LENGTH bignum_montmul_p384_interm1_mc - 28`) + BIGNUM_MONTMUL_P384_INTERM1_EXEC;; + +let equiv_input_states = new_definition + `!s1 s1' x y z. + (equiv_input_states:(armstate#armstate)->int64->int64->int64->bool) (s1,s1') x y z <=> + (?a b. + C_ARGUMENTS [z; x; y] s1 /\ + C_ARGUMENTS [z; x; y] s1' /\ + bignum_from_memory (x,6) s1 = a /\ + bignum_from_memory (x,6) s1' = a /\ + bignum_from_memory (y,6) s1 = b /\ + bignum_from_memory (y,6) s1' = b)`;; + +let equiv_output_states = new_definition + `!s1 s1' z. + (equiv_output_states:(armstate#armstate)->int64->bool) (s1,s1') z <=> + (?a. + bignum_from_memory (z,6) s1 = a /\ + bignum_from_memory (z,6) s1' = a)`;; + +(* This diff is generated by tools/diff.py. *) +let actions = [ + ("equal", 0, 1, 0, 1); + ("insert", 1, 1, 1, 2); + ("equal", 1, 4, 2, 5); + ("insert", 4, 4, 5, 6); + ("equal", 4, 6, 6, 8); + ("replace", 6, 8, 8, 19); + ("equal", 8, 117, 19, 128); + ("replace", 117, 119, 128, 148); + ("equal", 119, 120, 148, 149); + ("replace", 120, 122, 149, 151); + ("equal", 122, 377, 151, 406); +];; + +let equiv_goal1 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montmul_p384_core_mc); + (word pc2:int64,LENGTH bignum_montmul_p384_interm1_core_mc)]` + equiv_input_states + equiv_output_states + bignum_montmul_p384_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; + X10; X11; X12; X13; X14; X15; X16; X17; X19; + X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montmul_p384_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]`;; + + +let _org_extra_word_CONV = !extra_word_CONV;; +extra_word_CONV := + [GEN_REWRITE_CONV I [WORD_BITMANIP_SIMP_LEMMAS; WORD_MUL64_LO; WORD_MUL64_HI]] + @ (!extra_word_CONV);; + +let BIGNUM_MONTMUL_P384_CORE_EQUIV1 = time prove(equiv_goal1, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P384_CORE_EXEC; + BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Start *) + EQUIV_STEPS_TAC actions + BIGNUM_MONTMUL_P384_CORE_EXEC + BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,6) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + +extra_word_CONV := _org_extra_word_CONV;; + + +(****************************************************************************** + The second program equivalence between the core part of intermediate + program and fully optimized program +******************************************************************************) + +let bignum_montmul_p384_neon_mc = + define_from_elf "bignum_montmul_p384_neon_mc" + "arm/p384/bignum_montmul_p384_neon.o";; + +let BIGNUM_MONTMUL_P384_NEON_EXEC = + ARM_MK_EXEC_RULE bignum_montmul_p384_neon_mc;; + +let bignum_montmul_p384_neon_core_mc_def, + bignum_montmul_p384_neon_core_mc, + BIGNUM_MONTMUL_P384_NEON_CORE_EXEC = + mk_sublist_of_mc "bignum_montmul_p384_neon_core_mc" + bignum_montmul_p384_neon_mc + (`12`,`LENGTH bignum_montmul_p384_neon_mc - 28`) + BIGNUM_MONTMUL_P384_NEON_EXEC;; + + +let equiv_goal2 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montmul_p384_interm1_core_mc); + (word pc2:int64,LENGTH bignum_montmul_p384_neon_core_mc)]` + equiv_input_states + equiv_output_states + bignum_montmul_p384_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]` + bignum_montmul_p384_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]`;; + + +(* Line numbers from the fully optimized prog. to the intermediate prog. + The script that prints this map is being privately maintained by aqjune-aws. + This map can be also printed from the instruction map of SLOTHY's output, but + aqjune-aws does not have the converter. *) + +let inst_map = [ + 2;6;5;1;12;11;21;14;13;10;3;35;9;129;37;22;133;130;36;38;131;132;39;7;137;15;42;40;136;135;16;17;20;139;51;134;52;23;19;43;18;53;138;24;25;41;26;144;27;141;28;140;29;142;30;31;143;32;33;44;34;54;55;58;45;56;46;47;48;57;49;50;66;67;59;68;69;70;145;71;73;61;146;62;60;63;64;80;72;74;8;81;65;4;82;75;83;84;76;77;85;86;78;79;87;149;88;89;90;91;95;96;92;128;93;97;94;98;99;100;101;102;103;104;105;106;110;111;107;112;108;109;113;114;115;116;117;118;119;147;120;121;125;122;123;126;150;124;163;148;164;165;166;151;170;167;127;168;152;153;154;155;169;156;171;157;158;159;160;161;162;173;172;174;175;176;177;178;179;180;181;182;186;183;208;209;184;210;211;194;187;196;195;197;201;198;219;220;215;199;221;213;222;185;212;202;214;216;217;218;228;200;223;224;226;225;227;229;189;188;190;247;191;192;203;231;193;204;205;234;246;206;207;237;232;245;233;230;235;241;236;244;238;242;239;240;277;249;243;278;279;248;292;310;293;294;280;281;284;261;283;262;263;264;268;282;265;295;296;299;250;266;251;252;253;254;267;255;256;257;258;259;298;269;260;271;272;270;312;273;285;274;297;301;275;276;287;288;398;286;289;300;290;291;302;306;307;303;316;304;305;309;311;308;314;333;313;315;320;317;318;319;321;322;323;334;335;324;325;336;337;326;327;338;339;328;329;330;331;393;332;340;341;342;343;348;349;344;345;350;346;347;351;352;353;354;355;356;357;358;363;359;364;360;365;361;362;366;367;369;368;370;371;372;373;374;375;376;377;378;379;380;381;382;383;384;385;386;387;388;389;390;391;392;399;394;395;396;397;400;404;401;402;405;403;406 +];; + +(* (state number, (equation, fresh var)) *) +let state_to_abbrevs: (int * thm) list ref = ref [];; + +let BIGNUM_MONTMUL_P384_CORE_EQUIV2 = time prove( + equiv_goal2, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC; + BIGNUM_MONTMUL_P384_NEON_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Left *) + ARM_STEPS'_AND_ABBREV_TAC BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC + (1--(List.length inst_map)) state_to_abbrevs THEN + + (* Right *) + ARM_STEPS'_AND_REWRITE_TAC BIGNUM_MONTMUL_P384_NEON_CORE_EXEC + (1--(List.length inst_map)) inst_map state_to_abbrevs THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,6) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + + +(****************************************************************************** + Use transitivity of two program equivalences to prove end-to-end + correctness +******************************************************************************) + +let equiv_goal = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montmul_p384_core_mc); + (word pc2:int64,LENGTH bignum_montmul_p384_neon_core_mc)]` + equiv_input_states + equiv_output_states + bignum_montmul_p384_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; + X10; X11; X12; X13; X14; X15; X16; X17; X19; + X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montmul_p384_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]`;; + +let equiv_output_states_TRANS = prove( + `!s s2 s' + z. equiv_output_states (s,s') z /\ equiv_output_states (s',s2) z + ==> equiv_output_states (s,s2) z`, + MESON_TAC[equiv_output_states]);; + +let BIGNUM_MONTMUL_P384_CORE_EQUIV = time prove(equiv_goal, + + REPEAT STRIP_TAC THEN + SUBGOAL_THEN + `?pc3. + ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montmul_p384_core_mc); + (word pc3:int64,LENGTH bignum_montmul_p384_interm1_core_mc)] /\ + ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc3:int64,LENGTH bignum_montmul_p384_interm1_core_mc); + (word pc2:int64,LENGTH bignum_montmul_p384_neon_core_mc)] /\ + // Input buffers and the intermediate program don't alias + ALL (nonoverlapping + (word pc3:int64, LENGTH bignum_montmul_p384_interm1_core_mc)) + [x,8 * 6; y,8 * 6] /\ + 4 divides val (word pc3:int64)` + MP_TAC THENL [ + FIRST_X_ASSUM MP_TAC THEN + ASM_REWRITE_TAC + [ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC; + BIGNUM_MONTMUL_P384_NEON_CORE_EXEC; + BIGNUM_MONTMUL_P384_CORE_EXEC;GSYM CONJ_ASSOC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN POP_ASSUM_LIST (K ALL_TAC) THEN + FIND_HOLE_TAC; + + ALL_TAC + ] THEN + DISCH_THEN (CHOOSE_THEN (DESTRUCT_TAC "h1 h2 h3 h4")) THEN + + + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P384_CORE_EQUIV1 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTMUL_P384_CORE_EQUIV2 th))) THEN + FIRST_X_ASSUM (fun c1 -> + FIRST_X_ASSUM (fun c2 -> + MP_TAC (REWRITE_RULE [] (MATCH_MP ENSURES2_CONJ2 (CONJ c1 c2))) + )) THEN + + (* break 'ALL nonoverlapping' in assumptions *) + RULE_ASSUM_TAC (REWRITE_RULE[ + ALLPAIRS;ALL; + BIGNUM_MONTMUL_P384_CORE_EXEC; + BIGNUM_MONTMUL_P384_NEON_CORE_EXEC; + BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC; + NONOVERLAPPING_CLAUSES]) THEN + REPEAT SPLIT_FIRST_CONJ_ASSUM_TAC THEN + + MATCH_MP_TAC ENSURES2_WEAKEN THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[TAUT `(p /\ q /\ r) /\ p /\ q /\ r' <=> p /\ q /\ r /\ r'`] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc3,LENGTH bignum_montmul_p384_interm1_core_mc)) + bignum_montmul_p384_interm1_core_mc + (write PC (word pc3) s')` THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC THENL [ + UNDISCH_TAC `equiv_input_states (s,s') x y z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REWRITE_TAC[] THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC; + + UNDISCH_TAC `equiv_input_states (s,s') x y z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REWRITE_TAC[] THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P384_INTERM1_CORE_EXEC + ]; + + REPEAT GEN_TAC THEN STRIP_TAC THEN + ASM_REWRITE_TAC[] THEN ASM_MESON_TAC[equiv_output_states_TRANS]; + + SUBSUMED_MAYCHANGE_TAC + ]);; + + +(****************************************************************************** + Inducing BIGNUM_MONTMUL_P384_NEON_SUBROUTINE_CORRECT + from BIGNUM_MONTMUL_P384_CORE_CORRECT +******************************************************************************) + +(* Prove BIGNUM_MONTMUL_P384_CORE_CORRECT_N first *) + +let event_n_at_pc_goal = mk_eventually_n_at_pc_statement + `nonoverlapping + (word pc:int64, LENGTH + (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes)) + (z:int64,8 * 6)` + [`z:int64`;`x:int64`;`y:int64`] (*pc_mc_ofs*)0 + bignum_montmul_p384_core_mc (*pc_ofs*)0 + `\s0. C_ARGUMENTS [z;x;y] s0`;; + +let BIGNUM_MONTMUL_P384_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, + + REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTMUL_P384_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN + REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN + SUBGOAL_THEN `4 divides (LENGTH bignum_montmul_p384_core_mc)` + (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; + BIGNUM_MONTMUL_P384_CORE_EXEC]) THENL [ + REWRITE_TAC[BIGNUM_MONTMUL_P384_CORE_EXEC] + THEN CONV_TAC NUM_DIVIDES_CONV + THEN NO_TAC; + ALL_TAC] THEN + REPEAT GEN_TAC THEN STRIP_TAC THEN + (* now start..! *) + X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN + (* eventually ==> eventually_n *) + PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_MONTMUL_P384_CORE_EXEC);; + + +let BIGNUM_MONTMUL_P384_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTMUL_P384_EXEC + BIGNUM_MONTMUL_P384_CORE_EXEC + BIGNUM_MONTMUL_P384_CORE_CORRECT + BIGNUM_MONTMUL_P384_EVENTUALLY_N_AT_PC;; + + +(* This theorem is a copy of BIGNUM_MONTMUL_P384_CORE_CORRECT, but with + - 'pc' replaced with 'pc2' + - LENGTH of bignum_montmul_p384_core_mc replaced with + bignum_montmul_p384_neon_core_m + - The MAYCHANGE set replaced with the Neon version's one *) +let BIGNUM_MONTMUL_P384_NEON_CORE_CORRECT = time prove( + `!z x y a b pc2. + nonoverlapping (word pc2,LENGTH bignum_montmul_p384_neon_core_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montmul_p384_neon_core_mc /\ + read PC s = word (pc2) /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = word (pc2 + LENGTH bignum_montmul_p384_neon_core_mc) /\ + (a * b <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a * b) MOD p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes)) (z:int64,8 * 6) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes)) (x:int64,8 * 6) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes)) (y:int64,8 * 6) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTMUL_P384_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL; + LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH] THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P384_CORE_EQUIV BIGNUM_MONTMUL_P384_CORE_CORRECT_N + [BIGNUM_MONTMUL_P384_CORE_EXEC] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P384_EXEC;BIGNUM_MONTMUL_P384_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes))) + (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC) THEN + NO_TAC; + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTMUL_P384_NEON_CORE_EXEC]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_MONTMUL_P384_NEON_CORRECT = time prove( + `!z x y a b pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p384_neon_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_neon_mc /\ + read PC s = word (pc+12) /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = word ((pc + 12) + LENGTH bignum_montmul_p384_neon_core_mc) /\ + (a * b <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a * b) MOD p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTMUL_P384_NEON_CORE_CORRECT + bignum_montmul_p384_neon_core_mc_def + [BIGNUM_MONTMUL_P384_NEON_EXEC;BIGNUM_MONTMUL_P384_NEON_CORE_EXEC]);; + +let BIGNUM_MONTMUL_P384_NEON_SUBROUTINE_CORRECT = time prove + (`!z x y a b pc stackpointer returnaddress. + aligned 16 stackpointer /\ + nonoverlapping (word pc,LENGTH bignum_montmul_p384_neon_mc) (z,8 * 6) /\ + ALL (nonoverlapping (word_sub stackpointer (word 48),48)) + [(word pc,LENGTH bignum_montmul_p384_neon_mc); (x,8 * 6); (y,8 * 6); (z,8 * 6)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_neon_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = returnaddress /\ + (a * b <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a * b) MOD p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6); + memory :> bytes(word_sub stackpointer (word 48),48)])`, + REWRITE_TAC[BIGNUM_MONTMUL_P384_NEON_EXEC] THEN + ARM_ADD_RETURN_STACK_TAC + BIGNUM_MONTMUL_P384_NEON_EXEC + (let th = REWRITE_RULE [BIGNUM_MONTMUL_P384_NEON_CORE_EXEC; + BIGNUM_MONTMUL_P384_NEON_EXEC;GSYM ADD_ASSOC] + BIGNUM_MONTMUL_P384_NEON_CORRECT in + CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) th) + `[X19;X20;X21;X22;X23;X24]` 48);; + + +(****************************************************************************** + Inducing BIGNUM_AMONTMUL_P384_NEON_SUBROUTINE_CORRECT + from BIGNUM_AMONTMUL_P384_CORE_CORRECT +******************************************************************************) + +let BIGNUM_AMONTMUL_P384_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTMUL_P384_EXEC + BIGNUM_MONTMUL_P384_CORE_EXEC + BIGNUM_AMONTMUL_P384_CORE_CORRECT + BIGNUM_MONTMUL_P384_EVENTUALLY_N_AT_PC;; + + +(* This theorem is a copy of BIGNUM_AMONTMUL_P384_CORE_CORRECT, but with + - 'pc' replaced with 'pc2' + - LENGTH of bignum_montmul_p384_core_mc with + bignum_montmul_p384_neon_core_m + - The MAYCHANGE set replaced with the Neon version's one *) +let BIGNUM_AMONTMUL_P384_NEON_CORE_CORRECT = time prove( + `!z x y a b pc2. + nonoverlapping (word pc2,LENGTH bignum_montmul_p384_neon_core_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montmul_p384_neon_core_mc /\ + read PC s = word (pc2) /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = word (pc2 + LENGTH bignum_montmul_p384_neon_core_mc) /\ + (bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a * b) (mod p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes)) (z:int64,8 * 6) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes)) (x:int64,8 * 6) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes)) (y:int64,8 * 6) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTMUL_P384_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL; + LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH] THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTMUL_P384_CORE_EQUIV BIGNUM_AMONTMUL_P384_CORE_CORRECT_N + [BIGNUM_MONTMUL_P384_CORE_EXEC] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES; + BIGNUM_MONTMUL_P384_EXEC;BIGNUM_MONTMUL_P384_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes))) + (APPEND bignum_montmul_p384_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`;`b:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTMUL_P384_CORE_EXEC) THEN + NO_TAC; + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTMUL_P384_NEON_CORE_EXEC]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_AMONTMUL_P384_NEON_CORRECT = time prove( + `!z x y a b pc. + nonoverlapping (word pc,LENGTH bignum_montmul_p384_neon_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_neon_mc /\ + read PC s = word (pc+12) /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = word (pc + (12 + LENGTH bignum_montmul_p384_neon_core_mc)) /\ + (bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a * b) (mod p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTMUL_P384_NEON_CORE_CORRECT + bignum_montmul_p384_neon_core_mc_def + [BIGNUM_MONTMUL_P384_NEON_EXEC;BIGNUM_MONTMUL_P384_NEON_CORE_EXEC]);; + + +let BIGNUM_AMONTMUL_P384_NEON_SUBROUTINE_CORRECT = time prove + (`!z x y a b pc stackpointer returnaddress. + aligned 16 stackpointer /\ + nonoverlapping (word pc,LENGTH bignum_montmul_p384_neon_mc) (z,8 * 6) /\ + ALL (nonoverlapping (word_sub stackpointer (word 48),48)) + [(word pc,LENGTH bignum_montmul_p384_neon_mc); (x,8 * 6); (y,8 * 6); (z,8 * 6)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montmul_p384_neon_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,6) s = a /\ + bignum_from_memory (y,6) s = b) + (\s. read PC s = returnaddress /\ + (bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a * b) (mod p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6); + memory :> bytes(word_sub stackpointer (word 48),48)])`, + REWRITE_TAC[BIGNUM_MONTMUL_P384_NEON_EXEC] THEN + ARM_ADD_RETURN_STACK_TAC + BIGNUM_MONTMUL_P384_NEON_EXEC + (let th = REWRITE_RULE [BIGNUM_MONTMUL_P384_NEON_CORE_EXEC; + BIGNUM_MONTMUL_P384_NEON_EXEC;GSYM ADD_ASSOC] + BIGNUM_AMONTMUL_P384_NEON_CORRECT in + CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) th) + `[X19;X20;X21;X22;X23;X24]` 48);; diff --git a/arm/proofs/bignum_montsqr_p256.ml b/arm/proofs/bignum_montsqr_p256.ml index c4be0cc7e..2ee1b53e1 100644 --- a/arm/proofs/bignum_montsqr_p256.ml +++ b/arm/proofs/bignum_montsqr_p256.ml @@ -203,13 +203,13 @@ let lemma2 = prove let BIGNUM_MONTSQR_P256_CORE_CORRECT = time prove (`!z x a pc. - nonoverlapping (word pc,0x1f4) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc + 0x1f0) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p256_core_mc) /\ (a EXP 2 <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) @@ -219,7 +219,8 @@ let BIGNUM_MONTSQR_P256_CORE_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `a:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P256_CORE_EXEC] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN (*** Globalize the a EXP 2 <= 2 EXP 256 * p_256 assumption ***) @@ -370,13 +371,13 @@ let BIGNUM_MONTSQR_P256_CORE_CORRECT = time prove let BIGNUM_MONTSQR_P256_CORRECT = time prove (`!z x a pc. - nonoverlapping (word pc,0x1f4) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc + 0x1f0) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p256_core_mc) /\ (a EXP 2 <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) @@ -384,16 +385,10 @@ let BIGNUM_MONTSQR_P256_CORRECT = time prove X13; X14; X15; X16; X17] ,, MAYCHANGE [memory :> bytes(z,8 * 4)] ,, MAYCHANGE SOME_FLAGS)`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTSQR_P256_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montsqr_p256_core_mc_def;BIGNUM_MONTSQR_P256_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTSQR_P256_CORE_CORRECT + bignum_montsqr_p256_core_mc_def + [BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_EXEC]);; let BIGNUM_MONTSQR_P256_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. @@ -410,8 +405,10 @@ let BIGNUM_MONTSQR_P256_SUBROUTINE_CORRECT = time prove (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, - ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P256_EXEC - BIGNUM_MONTSQR_P256_CORRECT);; + ARM_ADD_RETURN_NOSTACK_TAC + BIGNUM_MONTSQR_P256_EXEC + (REWRITE_RULE [BIGNUM_MONTSQR_P256_EXEC;BIGNUM_MONTSQR_P256_CORE_EXEC] + BIGNUM_MONTSQR_P256_CORRECT));; (* ------------------------------------------------------------------------- *) (* Show that it also works as "almost-Montgomery" if desired. That is, even *) @@ -421,13 +418,13 @@ let BIGNUM_MONTSQR_P256_SUBROUTINE_CORRECT = time prove let BIGNUM_AMONTSQR_P256_CORE_CORRECT = time prove (`!z x a pc. - nonoverlapping (word pc,0x1f4) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc + 0x1f0) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p256_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; @@ -436,7 +433,8 @@ let BIGNUM_AMONTSQR_P256_CORE_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `a:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P256_CORE_EXEC] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,4) s0` THEN @@ -585,29 +583,23 @@ let BIGNUM_AMONTSQR_P256_CORE_CORRECT = time prove let BIGNUM_AMONTSQR_P256_CORRECT = time prove (`!z x a pc. - nonoverlapping (word pc,0x1f4) (z,8 * 4) + nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p256_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc + 0x1f0) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p256_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17] ,, MAYCHANGE [memory :> bytes(z,8 * 4)] ,, MAYCHANGE SOME_FLAGS)`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTSQR_P256_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montsqr_p256_core_mc_def;BIGNUM_MONTSQR_P256_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTSQR_P256_CORE_CORRECT + bignum_montsqr_p256_core_mc_def + [BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_EXEC]);; let BIGNUM_AMONTSQR_P256_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. @@ -624,4 +616,5 @@ let BIGNUM_AMONTSQR_P256_SUBROUTINE_CORRECT = time prove (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P256_EXEC - BIGNUM_AMONTSQR_P256_CORRECT);; + (REWRITE_RULE [BIGNUM_MONTSQR_P256_EXEC;BIGNUM_MONTSQR_P256_CORE_EXEC] + BIGNUM_AMONTSQR_P256_CORRECT));; diff --git a/arm/proofs/bignum_montsqr_p256_neon.ml b/arm/proofs/bignum_montsqr_p256_neon.ml index a287fcb7d..912263dda 100644 --- a/arm/proofs/bignum_montsqr_p256_neon.ml +++ b/arm/proofs/bignum_montsqr_p256_neon.ml @@ -212,8 +212,8 @@ let actions2 = [ let equiv_goal1 = mk_equiv_statement `ALL (nonoverlapping (z:int64,8 * 4)) - [(word pc:int64,LENGTH bignum_montsqr_p256_mc); - (word pc2:int64,LENGTH bignum_montsqr_p256_interm1_mc)]` + [(word pc:int64,LENGTH bignum_montsqr_p256_core_mc); + (word pc2:int64,LENGTH bignum_montsqr_p256_interm1_core_mc)]` equiv_input_states equiv_output_states bignum_montsqr_p256_core_mc 0 @@ -331,12 +331,11 @@ extra_word_CONV := WORD_MUL64_LO]] @ (!extra_word_CONV);; -let BIGNUM_MONTSQR_P256_EQUIV1 = prove(equiv_goal1, +let BIGNUM_MONTSQR_P256_CORE_EQUIV1 = prove(equiv_goal1, REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; - BIGNUM_MONTSQR_P256_EXEC;BIGNUM_MONTSQR_P256_CORE_EXEC; - BIGNUM_MONTSQR_P256_INTERM1_EXEC; + BIGNUM_MONTSQR_P256_CORE_EXEC; BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC] THEN REPEAT STRIP_TAC THEN (** Initialize **) @@ -419,8 +418,8 @@ let bignum_montsqr_p256_neon_core_mc_def, let equiv_goal2 = mk_equiv_statement `ALL (nonoverlapping (z:int64,8 * 4)) - [(word pc:int64,LENGTH bignum_montsqr_p256_interm1_mc); - (word pc2:int64,LENGTH bignum_montsqr_p256_neon_mc)]` + [(word pc:int64,LENGTH bignum_montsqr_p256_interm1_core_mc); + (word pc2:int64,LENGTH bignum_montsqr_p256_neon_core_mc)]` equiv_input_states equiv_output_states bignum_montsqr_p256_interm1_core_mc 0 @@ -442,13 +441,12 @@ let inst_map = [ (* (state number, (equation, fresh var)) *) let state_to_abbrevs: (int * thm) list ref = ref [];; -let BIGNUM_MONTSQR_P256_EQUIV2 = prove( +let BIGNUM_MONTSQR_P256_CORE_EQUIV2 = prove( equiv_goal2, REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; - BIGNUM_MONTSQR_P256_INTERM1_EXEC;BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC; - BIGNUM_MONTSQR_P256_NEON_EXEC; + BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC; BIGNUM_MONTSQR_P256_NEON_CORE_EXEC] THEN REPEAT STRIP_TAC THEN (** Initialize **) @@ -491,8 +489,8 @@ let BIGNUM_MONTSQR_P256_EQUIV2 = prove( let equiv_goal = mk_equiv_statement `ALL (nonoverlapping (z:int64,8 * 4)) - [(word pc:int64,LENGTH bignum_montsqr_p256_mc); - (word pc2:int64,LENGTH bignum_montsqr_p256_neon_mc)]` + [(word pc:int64,LENGTH bignum_montsqr_p256_core_mc); + (word pc2:int64,LENGTH bignum_montsqr_p256_neon_core_mc)]` equiv_input_states equiv_output_states bignum_montsqr_p256_core_mc 0 @@ -510,27 +508,26 @@ let equiv_output_states_TRANS = prove( ==> equiv_output_states (s,s2) z`, MESON_TAC[equiv_output_states]);; -let BIGNUM_MONTSQR_P256_EQUIV = prove(equiv_goal, +let BIGNUM_MONTSQR_P256_CORE_EQUIV = prove(equiv_goal, REPEAT STRIP_TAC THEN SUBGOAL_THEN `?pc3. ALL (nonoverlapping (z,8 * 4)) - [word pc:int64, LENGTH bignum_montsqr_p256_mc; - word pc3:int64, LENGTH bignum_montsqr_p256_interm1_mc] /\ + [word pc:int64, LENGTH bignum_montsqr_p256_core_mc; + word pc3:int64, LENGTH bignum_montsqr_p256_interm1_core_mc] /\ ALL (nonoverlapping (z,8 * 4)) - [word pc3:int64, LENGTH bignum_montsqr_p256_interm1_mc; - word pc2:int64, LENGTH bignum_montsqr_p256_neon_mc] /\ + [word pc3:int64, LENGTH bignum_montsqr_p256_interm1_core_mc; + word pc2:int64, LENGTH bignum_montsqr_p256_neon_core_mc] /\ nonoverlapping (x,8 * 4) - (word pc3:int64, LENGTH bignum_montsqr_p256_interm1_mc) /\ + (word pc3:int64, LENGTH bignum_montsqr_p256_interm1_core_mc) /\ 4 divides val (word pc3:int64)` MP_TAC THENL [ FIRST_X_ASSUM MP_TAC THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN ASM_REWRITE_TAC [ALL;NONOVERLAPPING_CLAUSES; - BIGNUM_MONTSQR_P256_INTERM1_EXEC;BIGNUM_MONTSQR_P256_NEON_EXEC; - BIGNUM_MONTSQR_P256_EXEC;GSYM CONJ_ASSOC] THEN + BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC; + BIGNUM_MONTSQR_P256_CORE_EXEC;GSYM CONJ_ASSOC] THEN STRIP_TAC THEN ASM_REWRITE_TAC[] THEN FIND_HOLE_TAC; @@ -539,8 +536,8 @@ let BIGNUM_MONTSQR_P256_EQUIV = prove(equiv_goal, ] THEN STRIP_TAC THEN - FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P256_EQUIV1 th))) THEN - FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P256_EQUIV2 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P256_CORE_EQUIV1 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P256_CORE_EQUIV2 th))) THEN FIRST_X_ASSUM (fun c1 -> FIRST_X_ASSUM (fun c2 -> MP_TAC (REWRITE_RULE [] (MATCH_MP ENSURES2_CONJ2 (CONJ c1 c2))) @@ -549,8 +546,9 @@ let BIGNUM_MONTSQR_P256_EQUIV = prove(equiv_goal, (* break 'ALL nonoverlapping' in assumptions *) RULE_ASSUM_TAC (REWRITE_RULE[ ALLPAIRS;ALL; - BIGNUM_MONTSQR_P256_EXEC;BIGNUM_MONTSQR_P256_NEON_EXEC; - BIGNUM_MONTSQR_P256_INTERM1_EXEC; + BIGNUM_MONTSQR_P256_CORE_EXEC; + BIGNUM_MONTSQR_P256_NEON_CORE_EXEC; + BIGNUM_MONTSQR_P256_INTERM1_CORE_EXEC; NONOVERLAPPING_CLAUSES]) THEN SPLIT_FIRST_CONJ_ASSUM_TAC THEN @@ -607,15 +605,20 @@ let BIGNUM_MONTSQR_P256_EQUIV = prove(equiv_goal, +(****************************************************************************** + Inducing BIGNUM_MONTSQR_P256_NEON_SUBROUTINE_CORRECT + from BIGNUM_MONTSQR_P256_CORE_CORRECT +******************************************************************************) + let event_n_at_pc_goal = mk_eventually_n_at_pc_statement - `nonoverlapping (word pc:int64,LENGTH bignum_montsqr_p256_mc) (z:int64,8 * 4)` + `nonoverlapping (word pc:int64,LENGTH + (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes)) (z:int64,8 * 4)` [`z:int64`;`x:int64`] (*pc_mc_ofs*)0 bignum_montsqr_p256_core_mc (*pc_ofs*)0 `\s0. C_ARGUMENTS [z;x] s0`;; let BIGNUM_MONTSQR_P256_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, - REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_EXEC; - BARRIER_INST_BYTES_LENGTH] THEN + REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTSQR_P256_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN SUBGOAL_THEN `4 divides (LENGTH bignum_montsqr_p256_core_mc)` (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; @@ -630,11 +633,6 @@ let BIGNUM_MONTSQR_P256_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_MONTSQR_P256_CORE_EXEC);; -(****************************************************************************** - Inducing BIGNUM_MONTSQR_P256_NEON_SUBROUTINE_CORRECT - from BIGNUM_MONTSQR_P256_CORE_CORRECT -******************************************************************************) - let BIGNUM_MONTSQR_P256_CORE_CORRECT_N = prove_correct_n BIGNUM_MONTSQR_P256_EXEC @@ -645,13 +643,13 @@ let BIGNUM_MONTSQR_P256_CORE_CORRECT_N = let BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT = prove( `!z x a pc2. - nonoverlapping (word pc2,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + nonoverlapping (word pc2,LENGTH bignum_montsqr_p256_neon_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc2) bignum_montsqr_p256_neon_core_mc /\ read PC s = word pc2 /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc2 + 544) /\ + (\s. read PC s = word (pc2 + LENGTH bignum_montsqr_p256_neon_core_mc) /\ (a EXP 2 <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) @@ -662,11 +660,13 @@ let BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT = prove( (* Prepare pc for the original program. *) SUBGOAL_THEN `?pc. - nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (z:int64,8 * 4) /\ - nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (x:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH + (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes)) (z:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH + (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes)) (x:int64,8 * 4) /\ 4 divides val (word pc:int64)` MP_TAC THENL [ - REWRITE_TAC[BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + REWRITE_TAC[LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH; + BIGNUM_MONTSQR_P256_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN FIND_HOLE_TAC; (** SUBGOAL 2 **) @@ -674,13 +674,13 @@ let BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT = prove( ] THEN REPEAT_N 2 STRIP_TAC THEN - - VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P256_EQUIV BIGNUM_MONTSQR_P256_CORE_CORRECT_N - [BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P256_CORE_EQUIV BIGNUM_MONTSQR_P256_CORE_CORRECT_N + [BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC] THEN (* unfold definitions that may block tactics *) - RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P256_EXEC; - BIGNUM_MONTSQR_P256_NEON_EXEC]) THEN + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P256_CORE_EXEC; + BIGNUM_MONTSQR_P256_NEON_CORE_EXEC]) THEN REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN REPEAT CONJ_TAC THENL [ (** SUBGOAL 1. Precond **) @@ -705,7 +705,8 @@ let BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT = prove( TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC); (** SUBGOAL 2. Postcond **) - MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC]; (** SUBGOAL 3. Frame **) MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] @@ -719,22 +720,16 @@ let BIGNUM_MONTSQR_P256_NEON_CORRECT = time prove read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc + 544) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p256_neon_core_mc) /\ (a EXP 2 <= 2 EXP 256 * p_256 ==> bignum_from_memory (z,4) s = (inverse_mod p_256 (2 EXP 256) * a EXP 2) MOD p_256)) (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montsqr_p256_neon_core_mc_def;BIGNUM_MONTSQR_P256_NEON_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTSQR_P256_NEON_CORE_CORRECT + bignum_montsqr_p256_neon_core_mc_def + [BIGNUM_MONTSQR_P256_NEON_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC]);; let BIGNUM_MONTSQR_P256_NEON_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. @@ -753,7 +748,7 @@ let BIGNUM_MONTSQR_P256_NEON_SUBROUTINE_CORRECT = time prove MAYCHANGE [memory :> bytes(z,8 * 4)])`, REWRITE_TAC[BIGNUM_MONTSQR_P256_NEON_EXEC] THEN ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P256_NEON_EXEC - (REWRITE_RULE[BIGNUM_MONTSQR_P256_NEON_EXEC] BIGNUM_MONTSQR_P256_NEON_CORRECT));; + (REWRITE_RULE[BIGNUM_MONTSQR_P256_NEON_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC] BIGNUM_MONTSQR_P256_NEON_CORRECT));; (****************************************************************************** @@ -770,13 +765,13 @@ let BIGNUM_AMONTSQR_P256_CORE_CORRECT_N = let BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT = prove( `!z x a pc2. - nonoverlapping (word pc2,LENGTH bignum_montsqr_p256_neon_mc) (z,8 * 4) + nonoverlapping (word pc2,LENGTH bignum_montsqr_p256_neon_core_mc) (z,8 * 4) ==> ensures arm (\s. aligned_bytes_loaded s (word pc2) bignum_montsqr_p256_neon_core_mc /\ read PC s = word pc2 /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc2 + 544) /\ + (\s. read PC s = word (pc2 + LENGTH bignum_montsqr_p256_neon_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, @@ -786,11 +781,13 @@ let BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT = prove( (* Prepare pc for the original program. *) SUBGOAL_THEN `?pc. - nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (z:int64,8 * 4) /\ - nonoverlapping (word pc,LENGTH bignum_montsqr_p256_mc) (x:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH + (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes)) (z:int64,8 * 4) /\ + nonoverlapping (word pc,LENGTH + (APPEND bignum_montsqr_p256_core_mc barrier_inst_bytes)) (x:int64,8 * 4) /\ 4 divides val (word pc:int64)` MP_TAC THENL [ - REWRITE_TAC[BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + REWRITE_TAC[LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH; + BIGNUM_MONTSQR_P256_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN FIND_HOLE_TAC; (** SUBGOAL 2 **) @@ -799,12 +796,12 @@ let BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT = prove( REPEAT_N 2 STRIP_TAC THEN - VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P256_EQUIV BIGNUM_AMONTSQR_P256_CORE_CORRECT_N - [BIGNUM_MONTSQR_P256_EXEC;NONOVERLAPPING_CLAUSES] THEN + VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P256_CORE_EQUIV BIGNUM_AMONTSQR_P256_CORE_CORRECT_N + [BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC] THEN (* unfold definitions that may block tactics *) - RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P256_EXEC; - BIGNUM_MONTSQR_P256_NEON_EXEC]) THEN + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P256_CORE_EXEC; + BIGNUM_MONTSQR_P256_NEON_CORE_EXEC]) THEN REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN REPEAT CONJ_TAC THENL [ (** SUBGOAL 1. Precond **) @@ -829,7 +826,8 @@ let BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT = prove( TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P256_CORE_EXEC); (** SUBGOAL 2. Postcond **) - MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTSQR_P256_CORE_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC]; (** SUBGOAL 3. Frame **) MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] @@ -843,21 +841,14 @@ let BIGNUM_AMONTSQR_P256_NEON_CORRECT = time prove read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,4) s = a) - (\s. read PC s = word (pc + 544) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p256_neon_core_mc) /\ (bignum_from_memory (z,4) s == inverse_mod p_256 (2 EXP 256) * a EXP 2) (mod p_256)) (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 4)])`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - EXISTS_TAC `x:int64` THEN ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_montsqr_p256_neon_core_mc_def;BIGNUM_MONTSQR_P256_NEON_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - ONCE_REWRITE_TAC[WORD_RULE `word pc:int64 = word_add (word pc) (word 0)`] THEN - ASM_SIMP_TAC[ALIGNED_BYTES_LOADED_SUB_LIST;WORD_ADD_0;NUM_DIVIDES_CONV`4 divides 0`]);; + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTSQR_P256_NEON_CORE_CORRECT + bignum_montsqr_p256_neon_core_mc_def + [BIGNUM_MONTSQR_P256_NEON_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC]);; let BIGNUM_AMONTSQR_P256_NEON_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. @@ -875,5 +866,5 @@ let BIGNUM_AMONTSQR_P256_NEON_SUBROUTINE_CORRECT = time prove MAYCHANGE [memory :> bytes(z,8 * 4)])`, REWRITE_TAC[BIGNUM_MONTSQR_P256_NEON_EXEC] THEN ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P256_NEON_EXEC - (REWRITE_RULE[BIGNUM_MONTSQR_P256_NEON_EXEC] BIGNUM_AMONTSQR_P256_NEON_CORRECT));; + (REWRITE_RULE[BIGNUM_MONTSQR_P256_NEON_EXEC;BIGNUM_MONTSQR_P256_NEON_CORE_EXEC] BIGNUM_AMONTSQR_P256_NEON_CORRECT));; diff --git a/arm/proofs/bignum_montsqr_p384.ml b/arm/proofs/bignum_montsqr_p384.ml index 6a91e92f5..14dc88163 100644 --- a/arm/proofs/bignum_montsqr_p384.ml +++ b/arm/proofs/bignum_montsqr_p384.ml @@ -278,6 +278,15 @@ let bignum_montsqr_p384_mc = let BIGNUM_MONTSQR_P384_EXEC = ARM_MK_EXEC_RULE bignum_montsqr_p384_mc;; +(* bignum_montsqr_p384_mc without ret. *) +let bignum_montsqr_p384_core_mc_def, + bignum_montsqr_p384_core_mc, + BIGNUM_MONTSQR_P384_CORE_EXEC = + mk_sublist_of_mc "bignum_montsqr_p384_core_mc" + bignum_montsqr_p384_mc + (`0`,`LENGTH bignum_montsqr_p384_mc - 4`) + BIGNUM_MONTSQR_P384_EXEC;; + (* ------------------------------------------------------------------------- *) (* Proof. *) (* ------------------------------------------------------------------------- *) @@ -526,15 +535,15 @@ let montred_subst_tac execth regs n = DISCH_THEN(SUBST_ALL_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> b = a - c`));; -let BIGNUM_MONTSQR_P384_CORRECT = time prove +let BIGNUM_MONTSQR_P384_CORE_CORRECT = time prove (`!z x a pc. - nonoverlapping (word pc,0x414) (z,8 * 6) + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_core_mc) (z,8 * 6) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_mc /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,6) s = a) - (\s. read PC s = word (pc + 0x410) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p384_core_mc) /\ (a EXP 2 <= 2 EXP 384 * p_384 ==> bignum_from_memory (z,6) s = (inverse_mod p_384 (2 EXP 384) * a EXP 2) MOD p_384)) @@ -544,19 +553,20 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `a:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + fst (CONJ_PAIR BIGNUM_MONTSQR_P384_CORE_EXEC)] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN (*** Globalize the a EXP 2 <= 2 EXP 384 * p_384 assumption ***) ASM_CASES_TAC `a EXP 2 <= 2 EXP 384 * p_384` THENL - [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTSQR_P384_EXEC (1--260)] THEN + [ASM_REWRITE_TAC[]; ARM_SIM_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (1--260)] THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,6) s0` THEN (*** Squaring of the lower half ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (1--28) (1--28) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (1--28) (1--28) THEN SUBGOAL_THEN `bignum_of_wordlist[x_0; x_1; x_2] EXP 2 = bignum_of_wordlist [mullo_s7; sum_s24; sum_s25; sum_s26; sum_s27; sum_s28]` @@ -569,7 +579,7 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove (*** Three short Montgomery reductions ***) - montred_tac BIGNUM_MONTSQR_P384_EXEC + montred_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X8;X13;X12;X11;X10;X9;X8; X14;X15;X16]` 28 THEN REPLICATE_TAC 2 (FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -577,9 +587,9 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X9;X8;X13;X12;X11;X10;X9; X14;X15;X16]` 43 THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X10;X9;X8;X13;X12;X11;X10; X14;X15;X16]` 58 THEN ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC) THEN DISCARD_MATCHING_ASSUMPTIONS [`word a = b`] THEN @@ -587,11 +597,11 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove (*** Three stashing stores ***) - ARM_STEPS_TAC BIGNUM_MONTSQR_P384_EXEC [74;75;76] THEN + ARM_STEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC [74;75;76] THEN (*** ADK cross-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC ([77;78;79] @ (83--93) @ [99] @ (105--109) @ [115] @ (121--124) @ [130] @ (136--138)) (77--138) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN @@ -625,7 +635,7 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove (*** Double the cross-product and add the Montgomerified lower square ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (139--155) (139--155) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (139--155) (139--155) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; VAL_WORD_BITVAL]) THEN SUBGOAL_THEN @@ -647,7 +657,7 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove (*** Three more Montgomery reductions on that sum ***) - montred_tac BIGNUM_MONTSQR_P384_EXEC + montred_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X8;X13;X12;X11;X10;X9;X8; X2;X3;X4]` 155 THEN FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -655,10 +665,10 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X9;X8;X13;X12;X11;X10;X9; X2;X3;X4]` 170 THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X10;X9;X8;X13;X12;X11;X10; X2;X3;X4]` 185 THEN ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC) THEN @@ -667,7 +677,7 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove (*** Montgomery accumulation and addition of the high square ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (201--237) (201--237) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (201--237) (201--237) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; VAL_WORD_BITVAL]) THEN (*** Main pre-reduced result ***) @@ -729,7 +739,7 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove (*** Final comparison ****) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (238--247) (238--247) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (238--247) (238--247) THEN SUBGOAL_THEN `sum_s247:int64 = word(bitval(p_384 <= t))` SUBST_ALL_TAC THENL @@ -747,10 +757,10 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove (*** Corrective masked subtraction ***) - ARM_STEPS_TAC BIGNUM_MONTSQR_P384_EXEC [248] THEN + ARM_STEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC [248] THEN RULE_ASSUM_TAC(REWRITE_RULE[WORD_RULE `word_sub (word 0) x:int64 = word_neg x`]) THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (249--260) (249--260) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (249--260) (249--260) THEN ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN ASM_REWRITE_TAC[] THEN TRANS_TAC EQ_TRANS `t MOD p_384` THEN CONJ_TAC THENL @@ -782,6 +792,27 @@ let BIGNUM_MONTSQR_P384_CORRECT = time prove CONV_TAC WORD_REDUCE_CONV THEN DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC);; +let BIGNUM_MONTSQR_P384_CORRECT = time prove( + `!z x a pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p384_core_mc) /\ + (a EXP 2 <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a EXP 2) MOD p_384)) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS)`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTSQR_P384_CORE_CORRECT + bignum_montsqr_p384_core_mc_def + [BIGNUM_MONTSQR_P384_CORE_EXEC;BIGNUM_MONTSQR_P384_EXEC]);; + let BIGNUM_MONTSQR_P384_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. nonoverlapping (word pc,0x414) (z,8 * 6) @@ -798,7 +829,8 @@ let BIGNUM_MONTSQR_P384_SUBROUTINE_CORRECT = time prove (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 6)])`, ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P384_EXEC - BIGNUM_MONTSQR_P384_CORRECT);; + (REWRITE_RULE[BIGNUM_MONTSQR_P384_CORE_EXEC;BIGNUM_MONTSQR_P384_EXEC] + BIGNUM_MONTSQR_P384_CORRECT));; (* ------------------------------------------------------------------------- *) (* Show that it also works as "almost-Montgomery" if desired. That is, even *) @@ -806,15 +838,15 @@ let BIGNUM_MONTSQR_P384_SUBROUTINE_CORRECT = time prove (* But the output, still 384 bits, may then not be fully reduced mod p_384. *) (* ------------------------------------------------------------------------- *) -let BIGNUM_AMONTSQR_P384_CORRECT = time prove +let BIGNUM_AMONTSQR_P384_CORE_CORRECT = time prove (`!z x a pc. - nonoverlapping (word pc,0x414) (z,8 * 6) + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_core_mc) (z,8 * 6) ==> ensures arm - (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_mc /\ + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_core_mc /\ read PC s = word pc /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,6) s = a) - (\s. read PC s = word (pc + 0x410) /\ + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p384_core_mc) /\ (bignum_from_memory (z,6) s == inverse_mod p_384 (2 EXP 384) * a EXP 2) (mod p_384)) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; @@ -823,14 +855,14 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `a:num`; `pc:num`] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P384_CORE_EXEC] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,6) s0` THEN (*** Squaring of the lower half ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (1--28) (1--28) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (1--28) (1--28) THEN SUBGOAL_THEN `bignum_of_wordlist[x_0; x_1; x_2] EXP 2 = bignum_of_wordlist [mullo_s7; sum_s24; sum_s25; sum_s26; sum_s27; sum_s28]` @@ -843,7 +875,7 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove (*** Three short Montgomery reductions ***) - montred_tac BIGNUM_MONTSQR_P384_EXEC + montred_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X8;X13;X12;X11;X10;X9;X8; X14;X15;X16]` 28 THEN REPLICATE_TAC 2 (FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -851,9 +883,9 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X9;X8;X13;X12;X11;X10;X9; X14;X15;X16]` 43 THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X10;X9;X8;X13;X12;X11;X10; X14;X15;X16]` 58 THEN ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC) THEN DISCARD_MATCHING_ASSUMPTIONS [`word a = b`] THEN @@ -861,11 +893,11 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove (*** Three stashing stores ***) - ARM_STEPS_TAC BIGNUM_MONTSQR_P384_EXEC [74;75;76] THEN + ARM_STEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC [74;75;76] THEN (*** ADK cross-product ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC ([77;78;79] @ (83--93) @ [99] @ (105--109) @ [115] @ (121--124) @ [130] @ (136--138)) (77--138) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES]) THEN @@ -899,7 +931,7 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove (*** Double the cross-product and add the Montgomerified lower square ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (139--155) (139--155) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (139--155) (139--155) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; VAL_WORD_BITVAL]) THEN SUBGOAL_THEN @@ -921,7 +953,7 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove (*** Three more Montgomery reductions on that sum ***) - montred_tac BIGNUM_MONTSQR_P384_EXEC + montred_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X8;X13;X12;X11;X10;X9;X8; X2;X3;X4]` 155 THEN FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC (LAND_CONV o RAND_CONV o LAND_CONV o RAND_CONV) @@ -929,10 +961,10 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove DISCH_THEN(ASSUME_TAC o MATCH_MP (REAL_ARITH `a:real = b + c ==> a - c = b`)) THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X9;X8;X13;X12;X11;X10;X9; X2;X3;X4]` 170 THEN - montred_subst_tac BIGNUM_MONTSQR_P384_EXEC + montred_subst_tac BIGNUM_MONTSQR_P384_CORE_EXEC `[X10;X9;X8;X13;X12;X11;X10; X2;X3;X4]` 185 THEN ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC) THEN @@ -941,7 +973,7 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove (*** Montgomery accumulation and addition of the high square ***) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (201--237) (201--237) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (201--237) (201--237) THEN RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; VAL_WORD_BITVAL]) THEN (*** Main pre-reduced result ***) @@ -1001,7 +1033,7 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove (*** Final comparison ****) - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (238--247) (238--247) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (238--247) (238--247) THEN SUBGOAL_THEN `sum_s247:int64 = word(bitval(p_384 <= t))` SUBST_ALL_TAC THENL @@ -1019,10 +1051,10 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove (*** Corrective masked subtraction ***) - ARM_STEPS_TAC BIGNUM_MONTSQR_P384_EXEC [248] THEN + ARM_STEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC [248] THEN RULE_ASSUM_TAC(REWRITE_RULE[WORD_RULE `word_sub (word 0) x:int64 = word_neg x`]) THEN - ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_EXEC (249--260) (249--260) THEN + ARM_ACCSTEPS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC (249--260) (249--260) THEN ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN FIRST_X_ASSUM(MATCH_MP_TAC o MATCH_MP (NUMBER_RULE @@ -1055,6 +1087,26 @@ let BIGNUM_AMONTSQR_P384_CORRECT = time prove REWRITE_TAC[BITVAL_CLAUSES; p_384] THEN CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC);; +let BIGNUM_AMONTSQR_P384_CORRECT = time prove( + `!z x a pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p384_core_mc) /\ + ((bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a EXP 2) (mod p_384))) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS)`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTSQR_P384_CORE_CORRECT + bignum_montsqr_p384_core_mc_def + [BIGNUM_MONTSQR_P384_CORE_EXEC;BIGNUM_MONTSQR_P384_EXEC]);; + let BIGNUM_AMONTSQR_P384_SUBROUTINE_CORRECT = time prove (`!z x a pc returnaddress. nonoverlapping (word pc,0x414) (z,8 * 6) @@ -1070,4 +1122,5 @@ let BIGNUM_AMONTSQR_P384_SUBROUTINE_CORRECT = time prove (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 6)])`, ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P384_EXEC - BIGNUM_AMONTSQR_P384_CORRECT);; + (REWRITE_RULE [BIGNUM_MONTSQR_P384_EXEC; + BIGNUM_MONTSQR_P384_CORE_EXEC] BIGNUM_AMONTSQR_P384_CORRECT));; diff --git a/arm/proofs/bignum_montsqr_p384_neon.ml b/arm/proofs/bignum_montsqr_p384_neon.ml new file mode 100644 index 000000000..5c5226e74 --- /dev/null +++ b/arm/proofs/bignum_montsqr_p384_neon.ml @@ -0,0 +1,934 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(****************************************************************************** + The first program equivalence between the 'core' part of source program and + its SIMD-vectorized but not instruction-unscheduled program +******************************************************************************) + +needs "arm/proofs/bignum_montsqr_p384.ml";; +needs "arm/proofs/equiv.ml";; +needs "arm/proofs/neon_helper.ml";; + +(* This is the intermediate program that is equivalent to both + bignum_montsqr_p384 and bignum_montsqr_p384_neon. This is a vectorized + version of bignum_montsqr_p384 but instructions are unscheduled. *) + +let bignum_montsqr_p384_interm1_ops:int list = [ + 0xa9400829; (* ldp x9, x2, [x1] *) + 0x3dc00032; (* ldr q18, [x1] *) + 0x3dc00033; (* ldr q19, [x1] *) + 0xa9411824; (* ldp x4, x6, [x1, #16] *) + 0xa9422825; (* ldp x5, x10, [x1, #32] *) + 0x3dc00835; (* ldr q21, [x1, #32] *) + 0x3dc0083c; (* ldr q28, [x1, #32] *) + 0x9b027d2c; (* mul x12, x9, x2 *) + 0x9b047d21; (* mul x1, x9, x4 *) + 0x9b047c4d; (* mul x13, x2, x4 *) + 0x6f00e5e0; (* movi v0.2d, #0xffffffff *) + 0x4e935a65; (* uzp2 v5.4s, v19.4s, v19.4s *) + 0x0ea12a59; (* xtn v25.2s, v18.2d *) + 0x0ea12a64; (* xtn v4.2s, v19.2d *) + 0x4ea00a77; (* rev64 v23.4s, v19.4s *) + 0x2ea4c334; (* umull v20.2d, v25.2s, v4.2s *) + 0x2ea5c33e; (* umull v30.2d, v25.2s, v5.2s *) + 0x4e925a53; (* uzp2 v19.4s, v18.4s, v18.4s *) + 0x4eb29ef6; (* mul v22.4s, v23.4s, v18.4s *) + 0x6f60169e; (* usra v30.2d, v20.2d, #32 *) + 0x2ea5c272; (* umull v18.2d, v19.2s, v5.2s *) + 0x6ea02ad6; (* uaddlp v22.2d, v22.4s *) + 0x4e201fd4; (* and v20.16b, v30.16b, v0.16b *) + 0x2ea48274; (* umlal v20.2d, v19.2s, v4.2s *) + 0x4f6056d3; (* shl v19.2d, v22.2d, #32 *) + 0x6f6017d2; (* usra v18.2d, v30.2d, #32 *) + 0x2ea48333; (* umlal v19.2d, v25.2s, v4.2s *) + 0x6f601692; (* usra v18.2d, v20.2d, #32 *) + 0x4e083e67; (* mov x7, v19.d[0] *) + 0x4e183e71; (* mov x17, v19.d[1] *) + 0x9b047c90; (* mul x16, x4, x4 *) + 0x9bc27d23; (* umulh x3, x9, x2 *) + 0xab03002f; (* adds x15, x1, x3 *) + 0x9bc47d21; (* umulh x1, x9, x4 *) + 0xba0101ad; (* adcs x13, x13, x1 *) + 0x9bc47c41; (* umulh x1, x2, x4 *) + 0xba1f0028; (* adcs x8, x1, xzr *) + 0x4e083e4b; (* mov x11, v18.d[0] *) + 0x4e183e4e; (* mov x14, v18.d[1] *) + 0x9bc47c81; (* umulh x1, x4, x4 *) + 0xab0c0183; (* adds x3, x12, x12 *) + 0xba0f01ef; (* adcs x15, x15, x15 *) + 0xba0d01ad; (* adcs x13, x13, x13 *) + 0xba08010c; (* adcs x12, x8, x8 *) + 0x9a1f0021; (* adc x1, x1, xzr *) + 0xab03016b; (* adds x11, x11, x3 *) + 0xba0f0223; (* adcs x3, x17, x15 *) + 0xba0d01d1; (* adcs x17, x14, x13 *) + 0xba0c020f; (* adcs x15, x16, x12 *) + 0x9a1f002d; (* adc x13, x1, xzr *) + 0xd3607ce1; (* lsl x1, x7, #32 *) + 0x8b070030; (* add x16, x1, x7 *) + 0xd360fe01; (* lsr x1, x16, #32 *) + 0xeb10002c; (* subs x12, x1, x16 *) + 0xda1f0201; (* sbc x1, x16, xzr *) + 0x93cc802c; (* extr x12, x1, x12, #32 *) + 0xd360fc21; (* lsr x1, x1, #32 *) + 0xab100027; (* adds x7, x1, x16 *) + 0x9a1f03e1; (* adc x1, xzr, xzr *) + 0xeb0c016c; (* subs x12, x11, x12 *) + 0xfa07006b; (* sbcs x11, x3, x7 *) + 0xfa010231; (* sbcs x17, x17, x1 *) + 0xfa1f01ef; (* sbcs x15, x15, xzr *) + 0xfa1f01ad; (* sbcs x13, x13, xzr *) + 0xda1f0203; (* sbc x3, x16, xzr *) + 0xd3607d81; (* lsl x1, x12, #32 *) + 0x8b0c0030; (* add x16, x1, x12 *) + 0xd360fe01; (* lsr x1, x16, #32 *) + 0xeb10002c; (* subs x12, x1, x16 *) + 0xda1f0201; (* sbc x1, x16, xzr *) + 0x93cc802c; (* extr x12, x1, x12, #32 *) + 0xd360fc21; (* lsr x1, x1, #32 *) + 0xab100027; (* adds x7, x1, x16 *) + 0x9a1f03e1; (* adc x1, xzr, xzr *) + 0xeb0c016c; (* subs x12, x11, x12 *) + 0xfa070231; (* sbcs x17, x17, x7 *) + 0xfa0101ef; (* sbcs x15, x15, x1 *) + 0xfa1f01ad; (* sbcs x13, x13, xzr *) + 0xfa1f006b; (* sbcs x11, x3, xzr *) + 0xda1f0203; (* sbc x3, x16, xzr *) + 0xd3607d81; (* lsl x1, x12, #32 *) + 0x8b0c0030; (* add x16, x1, x12 *) + 0xd360fe01; (* lsr x1, x16, #32 *) + 0xeb10002c; (* subs x12, x1, x16 *) + 0xda1f0201; (* sbc x1, x16, xzr *) + 0x93cc8027; (* extr x7, x1, x12, #32 *) + 0xd360fc21; (* lsr x1, x1, #32 *) + 0xab10002c; (* adds x12, x1, x16 *) + 0x9a1f03e1; (* adc x1, xzr, xzr *) + 0xeb070231; (* subs x17, x17, x7 *) + 0xfa0c01ef; (* sbcs x15, x15, x12 *) + 0xfa0101ad; (* sbcs x13, x13, x1 *) + 0xfa1f0167; (* sbcs x7, x11, xzr *) + 0xfa1f006c; (* sbcs x12, x3, xzr *) + 0xda1f0201; (* sbc x1, x16, xzr *) + 0xa9003c11; (* stp x17, x15, [x0] *) + 0xa9011c0d; (* stp x13, x7, [x0, #16] *) + 0xa902040c; (* stp x12, x1, [x0, #32] *) + 0x9b067d2e; (* mul x14, x9, x6 *) + 0x9b057c4f; (* mul x15, x2, x5 *) + 0x9b0a7c8d; (* mul x13, x4, x10 *) + 0x9bc67d27; (* umulh x7, x9, x6 *) + 0x9bc57c4c; (* umulh x12, x2, x5 *) + 0x9bca7c81; (* umulh x1, x4, x10 *) + 0xab0f00ef; (* adds x15, x7, x15 *) + 0xba0d0190; (* adcs x16, x12, x13 *) + 0x9a1f002d; (* adc x13, x1, xzr *) + 0xab0e01eb; (* adds x11, x15, x14 *) + 0xba0f0207; (* adcs x7, x16, x15 *) + 0xba1001ac; (* adcs x12, x13, x16 *) + 0x9a1f01a1; (* adc x1, x13, xzr *) + 0xab0e00f1; (* adds x17, x7, x14 *) + 0xba0f018f; (* adcs x15, x12, x15 *) + 0xba100023; (* adcs x3, x1, x16 *) + 0x9a1f01b0; (* adc x16, x13, xzr *) + 0xeb020121; (* subs x1, x9, x2 *) + 0xda81242d; (* cneg x13, x1, cc // cc = lo, ul, last *) + 0xda9f23e7; (* csetm x7, cc // cc = lo, ul, last *) + 0xeb0600a1; (* subs x1, x5, x6 *) + 0xda812421; (* cneg x1, x1, cc // cc = lo, ul, last *) + 0x9b017dac; (* mul x12, x13, x1 *) + 0x9bc17da1; (* umulh x1, x13, x1 *) + 0xda8720e7; (* cinv x7, x7, cc // cc = lo, ul, last *) + 0xca07018c; (* eor x12, x12, x7 *) + 0xca070021; (* eor x1, x1, x7 *) + 0xb10004ff; (* cmn x7, #0x1 *) + 0xba0c016b; (* adcs x11, x11, x12 *) + 0xba010231; (* adcs x17, x17, x1 *) + 0xba0701ef; (* adcs x15, x15, x7 *) + 0xba070063; (* adcs x3, x3, x7 *) + 0x9a070210; (* adc x16, x16, x7 *) + 0xeb040129; (* subs x9, x9, x4 *) + 0xda89252d; (* cneg x13, x9, cc // cc = lo, ul, last *) + 0xda9f23e7; (* csetm x7, cc // cc = lo, ul, last *) + 0xeb060141; (* subs x1, x10, x6 *) + 0xda812421; (* cneg x1, x1, cc // cc = lo, ul, last *) + 0x9b017dac; (* mul x12, x13, x1 *) + 0x9bc17da1; (* umulh x1, x13, x1 *) + 0xda8720e7; (* cinv x7, x7, cc // cc = lo, ul, last *) + 0xca07018c; (* eor x12, x12, x7 *) + 0xca070021; (* eor x1, x1, x7 *) + 0xb10004ff; (* cmn x7, #0x1 *) + 0xba0c0231; (* adcs x17, x17, x12 *) + 0xba0101ef; (* adcs x15, x15, x1 *) + 0xba07006d; (* adcs x13, x3, x7 *) + 0x9a070207; (* adc x7, x16, x7 *) + 0xeb040042; (* subs x2, x2, x4 *) + 0xda82244c; (* cneg x12, x2, cc // cc = lo, ul, last *) + 0xda9f23e1; (* csetm x1, cc // cc = lo, ul, last *) + 0xeb050142; (* subs x2, x10, x5 *) + 0xda822442; (* cneg x2, x2, cc // cc = lo, ul, last *) + 0x9b027d84; (* mul x4, x12, x2 *) + 0x9bc27d82; (* umulh x2, x12, x2 *) + 0xda812021; (* cinv x1, x1, cc // cc = lo, ul, last *) + 0xca010084; (* eor x4, x4, x1 *) + 0xca010042; (* eor x2, x2, x1 *) + 0xb100043f; (* cmn x1, #0x1 *) + 0xba0401ec; (* adcs x12, x15, x4 *) + 0xba0201a4; (* adcs x4, x13, x2 *) + 0x9a0100e2; (* adc x2, x7, x1 *) + 0xab0e01c1; (* adds x1, x14, x14 *) + 0xba0b0170; (* adcs x16, x11, x11 *) + 0xba110231; (* adcs x17, x17, x17 *) + 0xba0c018f; (* adcs x15, x12, x12 *) + 0xba04008d; (* adcs x13, x4, x4 *) + 0xba020047; (* adcs x7, x2, x2 *) + 0x9a1f03ec; (* adc x12, xzr, xzr *) + 0xa9400804; (* ldp x4, x2, [x0] *) + 0xab040021; (* adds x1, x1, x4 *) + 0xba020210; (* adcs x16, x16, x2 *) + 0xa9410804; (* ldp x4, x2, [x0, #16] *) + 0xba040231; (* adcs x17, x17, x4 *) + 0xba0201ef; (* adcs x15, x15, x2 *) + 0xa9420804; (* ldp x4, x2, [x0, #32] *) + 0xba0401ad; (* adcs x13, x13, x4 *) + 0xba0200e7; (* adcs x7, x7, x2 *) + 0x9a1f018b; (* adc x11, x12, xzr *) + 0xd3607c22; (* lsl x2, x1, #32 *) + 0x8b01004c; (* add x12, x2, x1 *) + 0xd360fd82; (* lsr x2, x12, #32 *) + 0xeb0c0044; (* subs x4, x2, x12 *) + 0xda1f0182; (* sbc x2, x12, xzr *) + 0x93c48044; (* extr x4, x2, x4, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab0c0041; (* adds x1, x2, x12 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb040204; (* subs x4, x16, x4 *) + 0xfa010230; (* sbcs x16, x17, x1 *) + 0xfa0201f1; (* sbcs x17, x15, x2 *) + 0xfa1f01af; (* sbcs x15, x13, xzr *) + 0xfa1f00ed; (* sbcs x13, x7, xzr *) + 0xda1f0187; (* sbc x7, x12, xzr *) + 0xd3607c82; (* lsl x2, x4, #32 *) + 0x8b04004c; (* add x12, x2, x4 *) + 0xd360fd82; (* lsr x2, x12, #32 *) + 0xeb0c0044; (* subs x4, x2, x12 *) + 0xda1f0182; (* sbc x2, x12, xzr *) + 0x93c48044; (* extr x4, x2, x4, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab0c0041; (* adds x1, x2, x12 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb040204; (* subs x4, x16, x4 *) + 0xfa010230; (* sbcs x16, x17, x1 *) + 0xfa0201f1; (* sbcs x17, x15, x2 *) + 0xfa1f01af; (* sbcs x15, x13, xzr *) + 0xfa1f00ed; (* sbcs x13, x7, xzr *) + 0xda1f0187; (* sbc x7, x12, xzr *) + 0xd3607c82; (* lsl x2, x4, #32 *) + 0x8b04004c; (* add x12, x2, x4 *) + 0xd360fd82; (* lsr x2, x12, #32 *) + 0xeb0c0044; (* subs x4, x2, x12 *) + 0xda1f0182; (* sbc x2, x12, xzr *) + 0x93c48041; (* extr x1, x2, x4, #32 *) + 0xd360fc42; (* lsr x2, x2, #32 *) + 0xab0c0044; (* adds x4, x2, x12 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xeb010203; (* subs x3, x16, x1 *) + 0xfa040231; (* sbcs x17, x17, x4 *) + 0xfa0201ef; (* sbcs x15, x15, x2 *) + 0xfa1f01a1; (* sbcs x1, x13, xzr *) + 0xfa1f00e4; (* sbcs x4, x7, xzr *) + 0xda1f0182; (* sbc x2, x12, xzr *) + 0xab01016d; (* adds x13, x11, x1 *) + 0xba1f0087; (* adcs x7, x4, xzr *) + 0xba1f004c; (* adcs x12, x2, xzr *) + 0xba1f03f0; (* adcs x16, xzr, xzr *) + 0x9b067cc2; (* mul x2, x6, x6 *) + 0xab020063; (* adds x3, x3, x2 *) + 0x0ea12b9e; (* xtn v30.2s, v28.2d *) + 0x0f20879a; (* shrn v26.2s, v28.2d, #32 *) + 0x2ebac3da; (* umull v26.2d, v30.2s, v26.2s *) + 0x4f615753; (* shl v19.2d, v26.2d, #33 *) + 0x2ebe83d3; (* umlal v19.2d, v30.2s, v30.2s *) + 0x4e083e61; (* mov x1, v19.d[0] *) + 0x4e183e64; (* mov x4, v19.d[1] *) + 0x9bc67cc2; (* umulh x2, x6, x6 *) + 0xba020231; (* adcs x17, x17, x2 *) + 0x9bc57ca2; (* umulh x2, x5, x5 *) + 0xba0101ef; (* adcs x15, x15, x1 *) + 0xba0201ad; (* adcs x13, x13, x2 *) + 0x9bca7d42; (* umulh x2, x10, x10 *) + 0xba0400e7; (* adcs x7, x7, x4 *) + 0xba02018c; (* adcs x12, x12, x2 *) + 0x9a1f0210; (* adc x16, x16, xzr *) + 0x4e080cdc; (* dup v28.2d, x6 *) + 0x6f00e5e0; (* movi v0.2d, #0xffffffff *) + 0x4e955aa5; (* uzp2 v5.4s, v21.4s, v21.4s *) + 0x0ea12b99; (* xtn v25.2s, v28.2d *) + 0x0ea12aa4; (* xtn v4.2s, v21.2d *) + 0x4ea00ab3; (* rev64 v19.4s, v21.4s *) + 0x2ea4c33e; (* umull v30.2d, v25.2s, v4.2s *) + 0x2ea5c337; (* umull v23.2d, v25.2s, v5.2s *) + 0x4e9c5b94; (* uzp2 v20.4s, v28.4s, v28.4s *) + 0x4ebc9e73; (* mul v19.4s, v19.4s, v28.4s *) + 0x6f6017d7; (* usra v23.2d, v30.2d, #32 *) + 0x2ea5c292; (* umull v18.2d, v20.2s, v5.2s *) + 0x6ea02a73; (* uaddlp v19.2d, v19.4s *) + 0x4e201efe; (* and v30.16b, v23.16b, v0.16b *) + 0x2ea4829e; (* umlal v30.2d, v20.2s, v4.2s *) + 0x4f605673; (* shl v19.2d, v19.2d, #32 *) + 0x6f6016f2; (* usra v18.2d, v23.2d, #32 *) + 0x2ea48333; (* umlal v19.2d, v25.2s, v4.2s *) + 0x6f6017d2; (* usra v18.2d, v30.2d, #32 *) + 0x4e083e66; (* mov x6, v19.d[0] *) + 0x4e183e61; (* mov x1, v19.d[1] *) + 0x9b0a7ca4; (* mul x4, x5, x10 *) + 0x4e083e42; (* mov x2, v18.d[0] *) + 0xab020021; (* adds x1, x1, x2 *) + 0x4e183e42; (* mov x2, v18.d[1] *) + 0xba020084; (* adcs x4, x4, x2 *) + 0x9bca7ca5; (* umulh x5, x5, x10 *) + 0x9a1f00a2; (* adc x2, x5, xzr *) + 0xab0600c5; (* adds x5, x6, x6 *) + 0xba010026; (* adcs x6, x1, x1 *) + 0xba040081; (* adcs x1, x4, x4 *) + 0xba020044; (* adcs x4, x2, x2 *) + 0x9a1f03e2; (* adc x2, xzr, xzr *) + 0xab050231; (* adds x17, x17, x5 *) + 0xba0601ef; (* adcs x15, x15, x6 *) + 0xba0101ad; (* adcs x13, x13, x1 *) + 0xba0400e7; (* adcs x7, x7, x4 *) + 0xba02018c; (* adcs x12, x12, x2 *) + 0x9a1f0202; (* adc x2, x16, xzr *) + 0xb26083e5; (* mov x5, #0xffffffff00000001 // #-4294967295 *) + 0xb2407fe6; (* mov x6, #0xffffffff // #4294967295 *) + 0xd2800021; (* mov x1, #0x1 // #1 *) + 0xab05007f; (* cmn x3, x5 *) + 0xba06023f; (* adcs xzr, x17, x6 *) + 0xba0101ff; (* adcs xzr, x15, x1 *) + 0xba1f01bf; (* adcs xzr, x13, xzr *) + 0xba1f00ff; (* adcs xzr, x7, xzr *) + 0xba1f019f; (* adcs xzr, x12, xzr *) + 0x9a1f0042; (* adc x2, x2, xzr *) + 0xcb0203e4; (* neg x4, x2 *) + 0x8a0400a2; (* and x2, x5, x4 *) + 0xab02006a; (* adds x10, x3, x2 *) + 0x8a0400c2; (* and x2, x6, x4 *) + 0xba020225; (* adcs x5, x17, x2 *) + 0x8a040022; (* and x2, x1, x4 *) + 0xba0201e6; (* adcs x6, x15, x2 *) + 0xba1f01a1; (* adcs x1, x13, xzr *) + 0xba1f00e4; (* adcs x4, x7, xzr *) + 0x9a1f0182; (* adc x2, x12, xzr *) + 0xa900140a; (* stp x10, x5, [x0] *) + 0xa9010406; (* stp x6, x1, [x0, #16] *) + 0xa9020804; (* stp x4, x2, [x0, #32] *) + 0xd65f03c0; (* ret *) +];; + +let bignum_montsqr_p384_interm1_mc = + let charlist = List.concat_map + (fun op32 -> + [Char.chr (Int.logand op32 255); + Char.chr (Int.logand (Int.shift_right op32 8) 255); + Char.chr (Int.logand (Int.shift_right op32 16) 255); + Char.chr (Int.logand (Int.shift_right op32 24) 255)]) + bignum_montsqr_p384_interm1_ops in + let byte_list = Bytes.init (List.length charlist) (fun i -> List.nth charlist i) in + define_word_list "bignum_montsqr_p384_interm1_mc" (term_of_bytes byte_list);; + +let BIGNUM_MONTSQR_P384_INTERM1_EXEC = + ARM_MK_EXEC_RULE bignum_montsqr_p384_interm1_mc;; + +let bignum_montsqr_p384_interm1_core_mc_def, + bignum_montsqr_p384_interm1_core_mc, + BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC = + mk_sublist_of_mc "bignum_montsqr_p384_interm1_core_mc" + bignum_montsqr_p384_interm1_mc + (`0`,`LENGTH bignum_montsqr_p384_interm1_mc - 4`) + BIGNUM_MONTSQR_P384_INTERM1_EXEC;; + +let equiv_input_states = new_definition + `!s1 s1' x z. + (equiv_input_states:(armstate#armstate)->int64->int64->bool) (s1,s1') x z <=> + (?a. + C_ARGUMENTS [z; x] s1 /\ + C_ARGUMENTS [z; x] s1' /\ + bignum_from_memory (x,6) s1 = a /\ + bignum_from_memory (x,6) s1' = a)`;; + +let equiv_output_states = new_definition + `!s1 s1' z. + (equiv_output_states:(armstate#armstate)->int64->bool) (s1,s1') z <=> + (?a. + bignum_from_memory (z,6) s1 = a /\ + bignum_from_memory (z,6) s1' = a)`;; + +(* This diff is generated by tools/diff.py. *) +let actions = [ + ("equal", 0, 1, 0, 1); + ("insert", 1, 1, 1, 3); + ("equal", 1, 3, 3, 5); + ("insert", 3, 3, 5, 7); + ("equal", 3, 6, 7, 10); + ("replace", 6, 8, 10, 30); + ("equal", 8, 15, 30, 37); + ("replace", 15, 17, 37, 39); + ("equal", 17, 206, 39, 228); + ("replace", 206, 208, 228, 235); + ("equal", 208, 217, 235, 244); + ("replace", 217, 219, 244, 265); + ("equal", 219, 220, 265, 266); + ("replace", 220, 221, 266, 267); + ("equal", 221, 222, 267, 268); + ("replace", 222, 223, 268, 269); + ("equal", 223, 260, 269, 306); +];; + +let equiv_goal1 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montsqr_p384_core_mc); + (word pc2:int64,LENGTH bignum_montsqr_p384_interm1_core_mc)]` + equiv_input_states + equiv_output_states + bignum_montsqr_p384_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montsqr_p384_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]`;; + +let _org_extra_word_CONV = !extra_word_CONV;; +extra_word_CONV := + [GEN_REWRITE_CONV I [WORD_BITMANIP_SIMP_LEMMAS; WORD_MUL64_LO; WORD_MUL64_HI; + WORD_SQR64_LO2]] + @ (!extra_word_CONV);; + +let BIGNUM_MONTSQR_P384_CORE_EQUIV1 = time prove(equiv_goal1, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P384_CORE_EXEC; + BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Start *) + EQUIV_STEPS_TAC actions + BIGNUM_MONTSQR_P384_CORE_EXEC + BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,6) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + +extra_word_CONV := _org_extra_word_CONV;; + + + +(****************************************************************************** + The second program equivalence between the core part of intermediate + program and fully optimized program +******************************************************************************) + +let bignum_montsqr_p384_neon_mc = + define_from_elf "bignum_montsqr_p384_neon_mc" + "arm/p384/bignum_montsqr_p384_neon.o";; + +let BIGNUM_MONTSQR_P384_NEON_EXEC = + ARM_MK_EXEC_RULE bignum_montsqr_p384_neon_mc;; + +let bignum_montsqr_p384_neon_core_mc_def, + bignum_montsqr_p384_neon_core_mc, + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC = + mk_sublist_of_mc "bignum_montsqr_p384_neon_core_mc" + bignum_montsqr_p384_neon_mc + (`0`,`LENGTH bignum_montsqr_p384_neon_mc - 4`) + BIGNUM_MONTSQR_P384_NEON_EXEC;; + + +let equiv_goal2 = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montsqr_p384_interm1_core_mc); + (word pc2:int64,LENGTH bignum_montsqr_p384_neon_core_mc)]` + equiv_input_states + equiv_output_states + bignum_montsqr_p384_interm1_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]` + bignum_montsqr_p384_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]`;; + +(* Line numbers from the fully optimized prog. to the intermediate prog. + The script that prints this map is being privately maintained by aqjune-aws. + This map can be also printed from the instruction map of SLOTHY's output, but + aqjune-aws does not have the converter. *) + +let inst_map = [ +3;1;2;4;15;12;32;14;19;7;13;6;18;5;34;22;16;9;17;230;229;25;21;10;27;231;20;36;11;247;33;23;35;26;245;37;29;24;8;30;232;248;250;51;253;233;28;41;40;52;254;249;42;53;43;252;44;251;38;256;45;257;246;54;255;55;46;31;260;39;47;56;48;258;49;57;259;261;50;58;262;59;60;61;66;62;67;263;63;68;64;65;69;104;70;71;72;73;74;75;76;81;103;77;82;78;83;79;80;84;85;86;87;88;89;90;102;91;92;93;100;94;96;95;101;116;97;118;117;98;99;105;106;107;119;120;123;121;122;124;108;109;110;111;112;113;114;125;115;132;134;133;147;148;149;135;139;136;126;127;137;128;129;138;130;131;150;154;151;141;142;140;152;143;144;145;171;153;146;157;155;158;168;156;159;174;160;161;271;162;163;164;165;166;167;169;170;266;172;173;238;178;179;175;265;176;180;177;181;241;182;183;184;185;186;187;188;193;189;194;190;195;191;192;196;197;199;198;200;201;202;208;203;204;209;269;210;205;267;206;207;211;212;268;270;214;213;272;215;227;216;217;218;264;219;220;221;222;273;274;234;275;236;276;277;223;224;225;226;228;235;237;285;239;240;242;243;244;278;279;280;281;284;282;286;283;287;288;289;290;291;292;293;294;295;296;297;298;299;300;304;301;302;305;303;306];; + + +(* (state number, (equation, fresh var)) *) +let state_to_abbrevs: (int * thm) list ref = ref [];; + +let BIGNUM_MONTSQR_P384_CORE_EQUIV2 = time prove( + equiv_goal2, + + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;SOME_FLAGS; + ALLPAIRS;ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC; + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC] THEN + REPEAT STRIP_TAC THEN + (** Initialize **) + EQUIV_INITIATE_TAC equiv_input_states THEN + REPEAT (FIRST_X_ASSUM BIGNUM_EXPAND_AND_DIGITIZE_TAC) THEN + ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN + (* necessary to run ldr qs *) + COMBINE_READ_BYTES64_PAIRS_TAC THEN + + (* Left *) + ARM_STEPS'_AND_ABBREV_TAC BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC + (1--(List.length inst_map)) state_to_abbrevs THEN + + (* Right *) + ARM_STEPS'_AND_REWRITE_TAC BIGNUM_MONTSQR_P384_NEON_CORE_EXEC + (1--(List.length inst_map)) inst_map state_to_abbrevs THEN + + REPEAT_N 2 ENSURES_FINAL_STATE'_TAC THEN + (* Prove remaining clauses from the postcondition *) + ASM_REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Outputs **) + ASM_REWRITE_TAC[equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; + BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,6) state`; + C_ARGUMENTS] THEN + REPEAT (HINT_EXISTS_REFL_TAC THEN ASM_REWRITE_TAC[]); + + (** SUBGOAL 2. Maychange left **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0':armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC; + + (** SUBGOAL 3. Maychange right **) + DISCARD_ASSUMPTIONS_TAC (fun th -> free_in `s0:armstate` (concl th)) THEN + MONOTONE_MAYCHANGE_TAC + ]);; + + +(****************************************************************************** + Use transitivity of two program equivalences to prove end-to-end + correctness +******************************************************************************) + +let equiv_goal = mk_equiv_statement + `ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montsqr_p384_core_mc); + (word pc2:int64,LENGTH bignum_montsqr_p384_neon_core_mc)]` + equiv_input_states + equiv_output_states + bignum_montsqr_p384_core_mc 0 + `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17] ,, + MAYCHANGE [memory :> bytes(z,8 * 6)] ,, + MAYCHANGE SOME_FLAGS` + bignum_montsqr_p384_neon_core_mc 0 + `MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)]`;; + +let equiv_output_states_TRANS = prove( + `!s s2 s' + z. equiv_output_states (s,s') z /\ equiv_output_states (s',s2) z + ==> equiv_output_states (s,s2) z`, + MESON_TAC[equiv_output_states]);; + +let BIGNUM_MONTSQR_P384_CORE_EQUIV = time prove(equiv_goal, + + REPEAT STRIP_TAC THEN + SUBGOAL_THEN + `?pc3. + ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc:int64,LENGTH bignum_montsqr_p384_core_mc); + (word pc3:int64,LENGTH bignum_montsqr_p384_interm1_core_mc)] /\ + ALL (nonoverlapping (z:int64,8 * 6)) + [(word pc3:int64,LENGTH bignum_montsqr_p384_interm1_core_mc); + (word pc2:int64,LENGTH bignum_montsqr_p384_neon_core_mc)] /\ + // Input buffers and the intermediate program don't alias + ALL (nonoverlapping + (word pc3:int64, LENGTH bignum_montsqr_p384_interm1_core_mc)) + [x,8 * 6] /\ + 4 divides val (word pc3:int64)` + MP_TAC THENL [ + FIRST_X_ASSUM MP_TAC THEN + ASM_REWRITE_TAC + [ALL;NONOVERLAPPING_CLAUSES; + BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC; + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC; + GSYM CONJ_ASSOC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN POP_ASSUM_LIST (K ALL_TAC) THEN + FIND_HOLE_TAC; + + ALL_TAC + ] THEN + DISCH_THEN (CHOOSE_THEN (DESTRUCT_TAC "h1 h2 h3 h4")) THEN + + + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P384_CORE_EQUIV1 th))) THEN + FIRST_X_ASSUM (fun th -> ASSUME_TAC (SPEC_ALL (MATCH_MP BIGNUM_MONTSQR_P384_CORE_EQUIV2 th))) THEN + FIRST_X_ASSUM (fun c1 -> + FIRST_X_ASSUM (fun c2 -> + MP_TAC (REWRITE_RULE [] (MATCH_MP ENSURES2_CONJ2 (CONJ c1 c2))) + )) THEN + + (* break 'ALL nonoverlapping' in assumptions *) + RULE_ASSUM_TAC (REWRITE_RULE[ + ALLPAIRS;ALL; + BIGNUM_MONTSQR_P384_CORE_EXEC; + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC; + BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC; + NONOVERLAPPING_CLAUSES]) THEN + REPEAT SPLIT_FIRST_CONJ_ASSUM_TAC THEN + + MATCH_MP_TAC ENSURES2_WEAKEN THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[TAUT `(p /\ q /\ r) /\ p /\ q /\ r' <=> p /\ q /\ r /\ r'`] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc3,LENGTH bignum_montsqr_p384_interm1_core_mc)) + bignum_montsqr_p384_interm1_core_mc + (write PC (word pc3) s')` THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC THENL [ + UNDISCH_TAC `equiv_input_states (s,s') x z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + MAP_EVERY EXISTS_TAC [`a:num`] THEN + REWRITE_TAC[] THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC; + + UNDISCH_TAC `equiv_input_states (s,s') x z` THEN + REWRITE_TAC[equiv_input_states;C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + MAP_EVERY EXISTS_TAC [`a:num`] THEN + REWRITE_TAC[] THEN + PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P384_INTERM1_CORE_EXEC + ]; + + REPEAT GEN_TAC THEN STRIP_TAC THEN + ASM_REWRITE_TAC[] THEN ASM_MESON_TAC[equiv_output_states_TRANS]; + + SUBSUMED_MAYCHANGE_TAC + ]);; + + +(****************************************************************************** + Inducing BIGNUM_MONTSQR_P384_NEON_SUBROUTINE_CORRECT + from BIGNUM_MONTSQR_P384_CORE_CORRECT +******************************************************************************) + +(* Prove BIGNUM_MONTSQR_P384_CORE_CORRECT_N first *) + +let event_n_at_pc_goal = mk_eventually_n_at_pc_statement + `nonoverlapping + (word pc:int64, + LENGTH (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes)) + (z:int64,8 * 6)` + [`z:int64`;`x:int64`] (*pc_mc_ofs*)0 + bignum_montsqr_p384_core_mc (*pc_ofs*)0 + `\s0. C_ARGUMENTS [z;x] s0`;; + +let BIGNUM_MONTSQR_P384_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, + + REWRITE_TAC[LENGTH_APPEND;BIGNUM_MONTSQR_P384_CORE_EXEC;BARRIER_INST_BYTES_LENGTH] THEN + REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN + SUBGOAL_THEN `4 divides (LENGTH bignum_montsqr_p384_core_mc)` + (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; + BIGNUM_MONTSQR_P384_CORE_EXEC]) THENL [ + REWRITE_TAC[BIGNUM_MONTSQR_P384_CORE_EXEC] THEN CONV_TAC NUM_DIVIDES_CONV; + ALL_TAC] THEN + REPEAT GEN_TAC THEN STRIP_TAC THEN + (* now start..! *) + X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN + (* eventually ==> eventually_n *) + PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC BIGNUM_MONTSQR_P384_CORE_EXEC);; + + +let BIGNUM_MONTSQR_P384_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTSQR_P384_EXEC + BIGNUM_MONTSQR_P384_CORE_EXEC + BIGNUM_MONTSQR_P384_CORE_CORRECT + BIGNUM_MONTSQR_P384_EVENTUALLY_N_AT_PC;; + +(* This theorem is a copy of BIGNUM_MONTSQR_P384_CORE_CORRECT, but with + - 'pc' replaced with 'pc2' + - LENGTH of bignum_montsqr_p384_core_mc with + bignum_montsqr_p384_neon_core_m + - The MAYCHANGE set replaced with the Neon version's one *) + +let BIGNUM_MONTSQR_P384_NEON_CORE_CORRECT = prove( + `!z x a pc2. + nonoverlapping (word pc2,LENGTH bignum_montsqr_p384_neon_core_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montsqr_p384_neon_core_mc /\ + read PC s = word pc2 /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = word (pc2 + LENGTH bignum_montsqr_p384_neon_core_mc) /\ + (a EXP 2 <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a EXP 2) MOD p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. This is going to be used + for preparing an initial state by 'overwriting' bignum_montsqr_p384_mc + at pc. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc, + LENGTH (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes)) (z:int64,8 * 6) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes)) (x:int64,8 * 6) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTSQR_P384_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL; + LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH] THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P384_CORE_EQUIV BIGNUM_MONTSQR_P384_CORE_CORRECT_N + [BIGNUM_MONTSQR_P384_CORE_EXEC;BIGNUM_MONTSQR_P384_NEON_CORE_EXEC] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P384_EXEC; + BIGNUM_MONTSQR_P384_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes))) + (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC); + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_MONTSQR_P384_NEON_CORRECT = time prove( + `!z x a pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_neon_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_neon_mc /\ + read PC s = word (pc) /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p384_neon_core_mc) /\ + (a EXP 2 <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a EXP 2) MOD p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MONTSQR_P384_NEON_CORE_CORRECT + bignum_montsqr_p384_neon_core_mc_def + [BIGNUM_MONTSQR_P384_NEON_CORE_EXEC;BIGNUM_MONTSQR_P384_NEON_EXEC]);; + +let BIGNUM_MONTSQR_P384_NEON_SUBROUTINE_CORRECT = time prove + (`!z x a pc returnaddress. + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_neon_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_neon_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = returnaddress /\ + (a EXP 2 <= 2 EXP 384 * p_384 + ==> bignum_from_memory (z,6) s = + (inverse_mod p_384 (2 EXP 384) * a EXP 2) MOD p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + REWRITE_TAC[BIGNUM_MONTSQR_P384_NEON_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P384_NEON_EXEC + (REWRITE_RULE [BIGNUM_MONTSQR_P384_NEON_EXEC; + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC] + BIGNUM_MONTSQR_P384_NEON_CORRECT));; + +(****************************************************************************** + Inducing BIGNUM_AMONTSQR_P384_NEON_SUBROUTINE_CORRECT + from BIGNUM_AMONTSQR_P384_CORE_CORRECT +******************************************************************************) + +let BIGNUM_AMONTSQR_P384_CORE_CORRECT_N = + prove_correct_n + BIGNUM_MONTSQR_P384_EXEC + BIGNUM_MONTSQR_P384_CORE_EXEC + BIGNUM_AMONTSQR_P384_CORE_CORRECT + BIGNUM_MONTSQR_P384_EVENTUALLY_N_AT_PC;; + +(* This theorem is a copy of BIGNUM_MONTSQR_P384_CORE_CORRECT, but with + - 'pc' replaced with 'pc2' + - LENGTH of bignum_montsqr_p384_core_mc with + bignum_montsqr_p384_neon_core_m + - The MAYCHANGE set replaced with the Neon version's one *) + +let BIGNUM_AMONTSQR_P384_NEON_CORE_CORRECT = prove( + `!z x a pc2. + nonoverlapping (word pc2,LENGTH bignum_montsqr_p384_neon_core_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_montsqr_p384_neon_core_mc /\ + read PC s = word pc2 /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = word (pc2 + LENGTH bignum_montsqr_p384_neon_core_mc) /\ + (bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a EXP 2) + (mod p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + REPEAT GEN_TAC THEN + (* Prepare pc for the original program. This is going to be used + for preparing an initial state by 'overwriting' bignum_montsqr_p384_mc + at pc. *) + SUBGOAL_THEN + `?pc. + nonoverlapping (word pc, + LENGTH (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes)) (z:int64,8 * 6) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes)) (x:int64,8 * 6) /\ + 4 divides val (word pc:int64)` MP_TAC THENL [ + REWRITE_TAC[BIGNUM_MONTSQR_P384_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL; + LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH] THEN + FIND_HOLE_TAC; + + (** SUBGOAL 2 **) + ALL_TAC + ] THEN + + REPEAT_N 2 STRIP_TAC THEN + + VCGEN_EQUIV_TAC BIGNUM_MONTSQR_P384_CORE_EQUIV BIGNUM_AMONTSQR_P384_CORE_CORRECT_N + [BIGNUM_MONTSQR_P384_CORE_EXEC;BIGNUM_MONTSQR_P384_NEON_CORE_EXEC] THEN + + (* unfold definitions that may block tactics *) + RULE_ASSUM_TAC (REWRITE_RULE[NONOVERLAPPING_CLAUSES;BIGNUM_MONTSQR_P384_EXEC; + BIGNUM_MONTSQR_P384_NEON_EXEC]) THEN + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + REPEAT CONJ_TAC THENL [ + (** SUBGOAL 1. Precond **) + X_GEN_TAC `s2:armstate` THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `4 divides val (word pc2:int64)` ASSUME_TAC THENL + [ FIRST_ASSUM (fun th -> + MP_TAC th THEN REWRITE_TAC[DIVIDES_4_VAL_WORD_64;aligned_bytes_loaded_word] + THEN METIS_TAC[]) THEN NO_TAC; ALL_TAC ] THEN + ASM_REWRITE_TAC[equiv_input_states] THEN + EXISTS_TAC + `write (memory :> bytelist + (word pc,LENGTH (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes))) + (APPEND bignum_montsqr_p384_core_mc barrier_inst_bytes) + (write PC (word pc) s2)` THEN + (* Expand variables appearing in the equiv relation *) + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC) THEN + (* Now has only one subgoal: the equivalence! *) + REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN + MAP_EVERY EXISTS_TAC [`a:num`] THEN + REPEAT CONJ_TAC THEN + TRY (PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MONTSQR_P384_CORE_EXEC); + + (** SUBGOAL 2. Postcond **) + MESON_TAC[equiv_output_states;BIGNUM_FROM_MEMORY_BYTES; + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC]; + + (** SUBGOAL 3. Frame **) + MESON_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI] + ]);; + +let BIGNUM_AMONTSQR_P384_NEON_CORRECT = time prove( + `!z x a pc. + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_neon_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_neon_mc /\ + read PC s = word (pc) /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = word (pc + LENGTH bignum_montsqr_p384_neon_core_mc) /\ + (bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a EXP 2) + (mod p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_AMONTSQR_P384_NEON_CORE_CORRECT + bignum_montsqr_p384_neon_core_mc_def + [BIGNUM_MONTSQR_P384_NEON_CORE_EXEC;BIGNUM_MONTSQR_P384_NEON_EXEC]);; + +let BIGNUM_AMONTSQR_P384_NEON_SUBROUTINE_CORRECT = time prove + (`!z x a pc returnaddress. + nonoverlapping (word pc,LENGTH bignum_montsqr_p384_neon_mc) (z,8 * 6) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_montsqr_p384_neon_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,6) s = a) + (\s. read PC s = returnaddress /\ + (bignum_from_memory (z,6) s == + inverse_mod p_384 (2 EXP 384) * a EXP 2) + (mod p_384)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(z,8 * 6)])`, + REWRITE_TAC[BIGNUM_MONTSQR_P384_NEON_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MONTSQR_P384_NEON_EXEC + (REWRITE_RULE [BIGNUM_MONTSQR_P384_NEON_EXEC; + BIGNUM_MONTSQR_P384_NEON_CORE_EXEC] + BIGNUM_AMONTSQR_P384_NEON_CORRECT));; + diff --git a/arm/proofs/bignum_mul_8_16.ml b/arm/proofs/bignum_mul_8_16.ml index 0e0f95c2a..1cdeef50d 100644 --- a/arm/proofs/bignum_mul_8_16.ml +++ b/arm/proofs/bignum_mul_8_16.ml @@ -546,14 +546,14 @@ let ADK_48_TAC = let BIGNUM_MUL_8_16_CORE_CORRECT = prove (`!z x y a b pc. ALL (nonoverlapping (z,8 * 16)) - [(word pc,0x740); (x,8 * 8); (y,8 * 8)] + [(word pc,LENGTH bignum_mul_8_16_core_mc); (x,8 * 8); (y,8 * 8)] ==> ensures arm - (\s. aligned_bytes_loaded s (word (pc + 0xc)) bignum_mul_8_16_core_mc /\ - read PC s = word(pc + 0xc) /\ + (\s. aligned_bytes_loaded s (word pc) bignum_mul_8_16_core_mc /\ + read PC s = word(pc) /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,8) s = a /\ bignum_from_memory (y,8) s = b) - (\s. read PC s = word (pc + 0x730) /\ + (\s. read PC s = word (pc + LENGTH bignum_mul_8_16_core_mc) /\ bignum_from_memory (z,16) s = a * b) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; @@ -562,13 +562,9 @@ let BIGNUM_MUL_8_16_CORE_CORRECT = prove MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `y:int64`; `a:num`; `b:num`; `pc:num`] THEN - ABBREV_TAC `pc' = pc + 0xc` THEN - SUBGOAL_THEN `pc+1840=pc'+1828` SUBST_ALL_TAC THENL [ASM_ARITH_TAC;ALL_TAC] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; BIGNUM_MUL_8_16_CORE_EXEC] THEN REWRITE_TAC[ALL; NONOVERLAPPING_CLAUSES] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN - SUBGOAL_THEN `nonoverlapping_modulo (2 EXP 64) (val (z:int64),8 * 16) (pc',1844)` - ASSUME_TAC THENL [EXPAND_TAC "pc'" THEN NONOVERLAPPING_TAC; ALL_TAC] THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,8) s0` THEN BIGNUM_DIGITIZE_TAC "y_" `bignum_from_memory (y,8) s0` THEN @@ -785,31 +781,23 @@ let BIGNUM_MUL_8_16_CORE_CORRECT = prove let BIGNUM_MUL_8_16_CORRECT = prove( `!z x y a b pc. ALL (nonoverlapping (z,8 * 16)) - [(word pc,0x740); (x,8 * 8); (y,8 * 8)] + [(word pc,LENGTH bignum_mul_8_16_mc); (x,8 * 8); (y,8 * 8)] ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_mul_8_16_mc /\ read PC s = word(pc + 0xc) /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,8) s = a /\ bignum_from_memory (y,8) s = b) - (\s. read PC s = word (pc + 0x730) /\ + (\s. read PC s = word (pc + 0xc + LENGTH bignum_mul_8_16_core_mc) /\ bignum_from_memory (z,16) s = a * b) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17; X19; X20; X21; X22; X23; X24] ,, MAYCHANGE [memory :> bytes(z,8 * 16)] ,, MAYCHANGE SOME_FLAGS)`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_MUL_8_16_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_mul_8_16_core_mc_def;BIGNUM_MUL_8_16_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - MATCH_MP_TAC ALIGNED_BYTES_LOADED_SUB_LIST THEN - ASM_REWRITE_TAC[] THEN - CONV_TAC NUM_DIVIDES_CONV);; + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MUL_8_16_CORE_CORRECT + bignum_mul_8_16_core_mc_def + [BIGNUM_MUL_8_16_EXEC;BIGNUM_MUL_8_16_CORE_EXEC]);; let BIGNUM_MUL_8_16_SUBROUTINE_CORRECT = prove (`!z x y a b pc stackpointer returnaddress. @@ -817,7 +805,7 @@ let BIGNUM_MUL_8_16_SUBROUTINE_CORRECT = prove nonoverlapping (z,8 * 16) (word_sub stackpointer (word 48),48) /\ ALLPAIRS nonoverlapping [(z,8 * 16); (word_sub stackpointer (word 48),48)] - [(word pc,0x740); (x,8 * 8); (y,8 * 8)] + [(word pc,LENGTH bignum_mul_8_16_mc); (x,8 * 8); (y,8 * 8)] ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_mul_8_16_mc /\ read PC s = word pc /\ @@ -831,6 +819,10 @@ let BIGNUM_MUL_8_16_SUBROUTINE_CORRECT = prove (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, MAYCHANGE [memory :> bytes(z,8 * 16); memory :> bytes(word_sub stackpointer (word 48),48)])`, + REWRITE_TAC[BIGNUM_MUL_8_16_EXEC] THEN ARM_ADD_RETURN_STACK_TAC - BIGNUM_MUL_8_16_EXEC BIGNUM_MUL_8_16_CORRECT + BIGNUM_MUL_8_16_EXEC + ((CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) o + REWRITE_RULE [BIGNUM_MUL_8_16_EXEC;BIGNUM_MUL_8_16_CORE_EXEC]) + BIGNUM_MUL_8_16_CORRECT) `[X19;X20;X21;X22;X23;X24]` 48);; diff --git a/arm/proofs/bignum_mul_8_16_neon.ml b/arm/proofs/bignum_mul_8_16_neon.ml index ac0f57c54..71ec2caa6 100644 --- a/arm/proofs/bignum_mul_8_16_neon.ml +++ b/arm/proofs/bignum_mul_8_16_neon.ml @@ -561,24 +561,24 @@ let bignum_mul_8_16_equiv_output_states = new_definition let equiv_goal = mk_equiv_statement `ALL (nonoverlapping (z,8 * 16)) [ - (word pc:int64,LENGTH bignum_mul_8_16_mc); - (word pc2:int64,LENGTH bignum_mul_8_16_neon_mc); + (word pc:int64,LENGTH bignum_mul_8_16_core_mc); + (word pc2:int64,LENGTH bignum_mul_8_16_neon_core_mc); (x,8 * 8); (y,8 * 8)]` bignum_mul_8_16_equiv_input_states bignum_mul_8_16_equiv_output_states - bignum_mul_8_16_core_mc 12 + bignum_mul_8_16_core_mc 0 `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; - X9; X10; X11; X12; X13; X14; X15; X16; - X17; X19; X20; X21; X22; X23; X24] ,, - MAYCHANGE [memory :> bytes(z,8 * 16)] ,, - MAYCHANGE SOME_FLAGS` - bignum_mul_8_16_neon_core_mc 12 + X9; X10; X11; X12; X13; X14; X15; X16; + X17; X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS` + bignum_mul_8_16_neon_core_mc 0 `MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; - X9; X10; X11; X12; X13; X14; X15; X16; - X17; X19; X20; X21; X22; X23; X24] ,, - MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5] ,, - MAYCHANGE [memory :> bytes(z,8 * 16)] ,, - MAYCHANGE SOME_FLAGS`;; + X9; X10; X11; X12; X13; X14; X15; X16; + X17; X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5] ,, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS`;; let _org_extra_word_CONV = !extra_word_CONV;; extra_word_CONV := @@ -606,11 +606,10 @@ let actions = [ ("equal", 135, 457, 171, 493); ];; -let BIGNUM_MUL_8_16_EQUIV = prove( +let BIGNUM_MUL_8_16_CORE_EQUIV = prove( equiv_goal, - REWRITE_TAC[SOME_FLAGS;ALL;NONOVERLAPPING_CLAUSES;BIGNUM_MUL_8_16_EXEC; - BIGNUM_MUL_8_16_NEON_EXEC;BIGNUM_MUL_8_16_CORE_EXEC] THEN + REWRITE_TAC[SOME_FLAGS;ALL;NONOVERLAPPING_CLAUSES; BIGNUM_MUL_8_16_NEON_CORE_EXEC;BIGNUM_MUL_8_16_CORE_EXEC] THEN REPEAT STRIP_TAC THEN (** Initialize **) EQUIV_INITIATE_TAC bignum_mul_8_16_equiv_input_states THEN @@ -618,17 +617,6 @@ let BIGNUM_MUL_8_16_EQUIV = prove( ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN (* necessary to run ldr qs. *) COMBINE_READ_BYTES64_PAIRS_TAC THEN - (* Abbreviate pc to help tactics. *) - ASSERT_NONOVERLAPPING_MODULO_TAC - `nonoverlapping_modulo (2 EXP 64) - (val (z:int64),8 * 16) (pc + 12,LENGTH bignum_mul_8_16_core_mc)` - BIGNUM_MUL_8_16_CORE_EXEC THEN - ASSERT_NONOVERLAPPING_MODULO_TAC - `nonoverlapping_modulo (2 EXP 64) - (val (z:int64),8 * 16) (pc2 + 12,LENGTH bignum_mul_8_16_neon_core_mc)` - BIGNUM_MUL_8_16_NEON_CORE_EXEC THEN - ABBREV_TAC `pc' = pc + 12` THEN - ABBREV_TAC `pc2' = pc2 + 12` THEN (* The main simulation part *) EQUIV_STEPS_TAC actions @@ -638,12 +626,6 @@ let BIGNUM_MUL_8_16_EQUIV = prove( ENSURES_FINAL_STATE'_TAC THEN ENSURES_FINAL_STATE'_TAC THEN ASM_REWRITE_TAC[] THEN REPEAT CONJ_TAC THENL [ - (** SUBGOAL 0. PC **) - EXPAND_TAC "pc'" THEN CONV_TAC WORD_RULE; - - (** SUBGOAL 1. PC2 **) - EXPAND_TAC "pc2'" THEN CONV_TAC WORD_RULE; - (** SUBGOAL 2. Outputs **) ASM_REWRITE_TAC[bignum_mul_8_16_equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,8) state`; @@ -663,7 +645,7 @@ let BIGNUM_MUL_8_16_EQUIV = prove( extra_word_CONV := _org_extra_word_CONV;; (** Now we will prove that bignum_mul_8_16_neon is correct using - BIGNUM_MUL_8_16_EQUIV and BIGNUM_MUL_8_16_CORE_CORRECT. + BIGNUM_MUL_8_16_CORE_EQUIV and BIGNUM_MUL_8_16_CORE_CORRECT. In order to do this, we need an updated version of BIGNUM_MUL_8_16_CORE_CORRECT that has # instructions to step. @@ -677,15 +659,16 @@ extra_word_CONV := _org_extra_word_CONV;; let event_n_at_pc_goal = mk_eventually_n_at_pc_statement `ALL (nonoverlapping (z,8 * 16)) [ - (word pc:int64,LENGTH (bignum_mul_8_16_mc)); + (word pc:int64, + LENGTH (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes)); (x:int64,8 * 8); (y:int64,8 * 8)]` - [`z:int64`;`x:int64`;`y:int64`] 12 bignum_mul_8_16_core_mc 12 + [`z:int64`;`x:int64`;`y:int64`] 0 + bignum_mul_8_16_core_mc 0 `(\s0. C_ARGUMENTS [z;x;y] s0)`;; let BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, - - REWRITE_TAC[LENGTH_APPEND;BIGNUM_MUL_8_16_CORE_EXEC;BIGNUM_MUL_8_16_EXEC; - BARRIER_INST_BYTES_LENGTH] THEN + REWRITE_TAC[LENGTH_APPEND;BIGNUM_MUL_8_16_CORE_EXEC; + BARRIER_INST_BYTES_LENGTH] THEN REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN SUBGOAL_THEN `4 divides (LENGTH bignum_mul_8_16_core_mc)` (fun th -> REWRITE_TAC[MATCH_MP aligned_bytes_loaded_append th; @@ -695,21 +678,6 @@ let BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, REPEAT_N 4 GEN_TAC THEN (* nonoverlapping *) STRIP_TAC THEN - (* Abbreviate pc+12 as pc' because EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC likes pc without offsets *) - ASSERT_NONOVERLAPPING_MODULO_TAC - `nonoverlapping_modulo (2 EXP 64) (val (z:int64), 128) - (pc+12, LENGTH bignum_mul_8_16_mc - 12)` - BIGNUM_MUL_8_16_EXEC THEN - ABBREV_TAC `pc'=pc+12` THEN - SUBGOAL_THEN - `pc+0x0c+LENGTH bignum_mul_8_16_core_mc = pc'+LENGTH bignum_mul_8_16_core_mc` - MP_TAC THENL [ - REWRITE_TAC[BIGNUM_MUL_8_16_CORE_EXEC] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_ADD_CONV) THEN - EXPAND_TAC "pc'" THEN CONV_TAC WORD_ARITH; ALL_TAC] THEN - DISCH_THEN (fun th -> - let th = REWRITE_RULE[BIGNUM_MUL_8_16_CORE_EXEC] th in - REWRITE_TAC[CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) th]) THEN (* now start..! *) X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN @@ -731,21 +699,17 @@ let BIGNUM_MUL_8_16_CORE_CORRECT_N = BIGNUM_MUL_8_16_CORE_CORRECT BIGNUM_MUL_8_16_EVENTUALLY_N_AT_PC;; - -(** Finally, we prove the correctness of core of bignum_mul_8_16_neon - using BIGNUM_MUL_8_16_CORE_CORRECT_N and BIGNUM_MUL_8_16_EQUIV! -**) -let BIGNUM_MUL_8_16_NEON_CORRECT = prove( +let BIGNUM_MUL_8_16_NEON_CORE_CORRECT = prove( `!z x y a b pc2. ALL (nonoverlapping (z,8 * 16)) - [(word pc2,LENGTH bignum_mul_8_16_neon_mc); (x,8 * 8); (y,8 * 8)] + [(word pc2,LENGTH bignum_mul_8_16_neon_core_mc); (x,8 * 8); (y,8 * 8)] ==> ensures arm - (\s. aligned_bytes_loaded s (word pc2) bignum_mul_8_16_neon_mc /\ - read PC s = word(pc2 + 0xc) /\ + (\s. aligned_bytes_loaded s (word pc2) bignum_mul_8_16_neon_core_mc /\ + read PC s = word(pc2) /\ C_ARGUMENTS [z; x; y] s /\ bignum_from_memory (x,8) s = a /\ bignum_from_memory (y,8) s = b) - (\s. read PC s = word (pc2 + 1984) /\ + (\s. read PC s = word (pc2 + LENGTH bignum_mul_8_16_neon_core_mc) /\ bignum_from_memory (z,16) s = a * b) (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; @@ -755,18 +719,20 @@ let BIGNUM_MUL_8_16_NEON_CORRECT = prove( MAYCHANGE SOME_FLAGS)`, let mc_lengths_th = - map (fst o CONJ_PAIR) [BIGNUM_MUL_8_16_EXEC; BIGNUM_MUL_8_16_NEON_EXEC] in + map (fst o CONJ_PAIR) [BIGNUM_MUL_8_16_CORE_EXEC; BIGNUM_MUL_8_16_NEON_CORE_EXEC] in REPEAT GEN_TAC THEN (* Prepare pc for the 'left' program. The left program must not have overwritten x and y. *) SUBGOAL_THEN `?pc. - nonoverlapping (z:int64,8 * 16) (word pc,LENGTH bignum_mul_8_16_mc) /\ - ALL (nonoverlapping (word pc,LENGTH bignum_mul_8_16_mc)) + nonoverlapping (z:int64,8 * 16) (word pc, + LENGTH (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes)) /\ + ALL (nonoverlapping (word pc, + LENGTH (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes))) [(x:int64,8 * 8); (y:int64,8 * 8)] /\ 4 divides val (word pc:int64)` MP_TAC THENL [ - REWRITE_TAC[BIGNUM_MUL_8_16_EXEC;NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN + REWRITE_TAC[BIGNUM_MUL_8_16_CORE_EXEC;NONOVERLAPPING_CLAUSES;ALL; + LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH] THEN FIND_HOLE_TAC; ALL_TAC @@ -775,8 +741,8 @@ let BIGNUM_MUL_8_16_NEON_CORRECT = prove( (* massage nonoverlapping assumptions *) REPEAT_N 2 STRIP_TAC THEN - VCGEN_EQUIV_TAC BIGNUM_MUL_8_16_EQUIV BIGNUM_MUL_8_16_CORE_CORRECT_N - [BIGNUM_MUL_8_16_EXEC; BIGNUM_MUL_8_16_NEON_EXEC] THEN + VCGEN_EQUIV_TAC BIGNUM_MUL_8_16_CORE_EQUIV BIGNUM_MUL_8_16_CORE_CORRECT_N + [BIGNUM_MUL_8_16_CORE_EXEC; BIGNUM_MUL_8_16_NEON_CORE_EXEC] THEN (* unravel definitions that may block reasonings *) RULE_ASSUM_TAC (REWRITE_RULE([ALL;NONOVERLAPPING_CLAUSES] @ mc_lengths_th)) THEN @@ -792,20 +758,9 @@ let BIGNUM_MUL_8_16_NEON_CORRECT = prove( ASM_REWRITE_TAC[bignum_mul_8_16_equiv_input_states] THEN EXISTS_TAC `write (memory :> bytelist - (word (pc+12),LENGTH (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes))) + (word pc,LENGTH (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes))) (APPEND bignum_mul_8_16_core_mc barrier_inst_bytes) - (write PC (word (pc+12)) s2)` THEN - SUBGOAL_THEN `aligned_bytes_loaded s2 (word (pc2 + 12):int64) bignum_mul_8_16_neon_core_mc` - (fun th -> REWRITE_TAC[th]) THENL [ - REWRITE_TAC[bignum_mul_8_16_neon_core_mc_def] THEN - IMP_REWRITE_TAC[WORD_ADD;ALIGNED_BYTES_LOADED_SUB_LIST] THEN - CONV_TAC NUM_DIVIDES_CONV THEN NO_TAC; - ALL_TAC - ] THEN - SUBGOAL_THEN `4 divides val (word (pc+12):int64)` ASSUME_TAC THENL [ - IMP_REWRITE_TAC[DIVIDES_4_VAL_WORD_ADD_64] THEN CONV_TAC NUM_DIVIDES_CONV; - ALL_TAC - ] THEN + (write PC (word pc) s2)` THEN PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MUL_8_16_CORE_EXEC THEN (* Now has only one subgoal: the equivalence! *) REWRITE_TAC[C_ARGUMENTS;BIGNUM_FROM_MEMORY_BYTES] THEN @@ -813,12 +768,35 @@ let BIGNUM_MUL_8_16_NEON_CORRECT = prove( PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_MUL_8_16_CORE_EXEC; (** SUBGOAL 2. Postcond **) - MESON_TAC[bignum_mul_8_16_equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + MESON_TAC[bignum_mul_8_16_equiv_output_states;BIGNUM_FROM_MEMORY_BYTES;BIGNUM_MUL_8_16_NEON_CORE_EXEC]; (** SUBGOAL 3. Frame **) MESON_TAC[] ]);; +let BIGNUM_MUL_8_16_NEON_CORRECT = prove( + `!z x y a b pc2. + ALL (nonoverlapping (z,8 * 16)) + [(word pc2,LENGTH bignum_mul_8_16_neon_mc); (x,8 * 8); (y,8 * 8)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_mul_8_16_neon_mc /\ + read PC s = word(pc2 + 0xc) /\ + C_ARGUMENTS [z; x; y] s /\ + bignum_from_memory (x,8) s = a /\ + bignum_from_memory (y,8) s = b) + (\s. read PC s = word (pc2 + 0xc + LENGTH bignum_mul_8_16_neon_core_mc) /\ + bignum_from_memory (z,16) s = a * b) + (MAYCHANGE [PC; X1; X2; X3; X4; X5; X6; X7; X8; + X9; X10; X11; X12; X13; X14; X15; X16; + X17; X19; X20; X21; X22; X23; X24] ,, + MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5],, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS)`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_MUL_8_16_NEON_CORE_CORRECT + bignum_mul_8_16_neon_core_mc_def + [BIGNUM_MUL_8_16_NEON_EXEC;BIGNUM_MUL_8_16_NEON_CORE_EXEC]);; + let BIGNUM_MUL_8_16_NEON_SUBROUTINE_CORRECT = prove (`!z x y a b pc stackpointer returnaddress. aligned 16 stackpointer /\ @@ -844,5 +822,6 @@ let BIGNUM_MUL_8_16_NEON_SUBROUTINE_CORRECT = prove MAYCHANGE SOME_FLAGS)`, ARM_ADD_RETURN_STACK_TAC BIGNUM_MUL_8_16_NEON_EXEC - (REWRITE_RULE [BIGNUM_MUL_8_16_NEON_EXEC] BIGNUM_MUL_8_16_NEON_CORRECT) + ((CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) o REWRITE_RULE + [BIGNUM_MUL_8_16_NEON_EXEC;BIGNUM_MUL_8_16_NEON_CORE_EXEC]) BIGNUM_MUL_8_16_NEON_CORRECT) `[X19;X20;X21;X22;X23;X24]` 48);; diff --git a/arm/proofs/bignum_sqr_8_16.ml b/arm/proofs/bignum_sqr_8_16.ml index 8ed1b2807..d1cc79e42 100644 --- a/arm/proofs/bignum_sqr_8_16.ml +++ b/arm/proofs/bignum_sqr_8_16.ml @@ -376,25 +376,22 @@ let ADK_48_TAC = let BIGNUM_SQR_8_16_CORE_CORRECT = prove (`!z x a pc. - nonoverlapping (word pc,0x49c) (z,8 * 16) + nonoverlapping (word pc,LENGTH bignum_sqr_8_16_core_mc) (z,8 * 16) ==> ensures arm - (\s. aligned_bytes_loaded s (word (pc + 0x8)) bignum_sqr_8_16_core_mc /\ - read PC s = word(pc + 0x8) /\ + (\s. aligned_bytes_loaded s (word pc) bignum_sqr_8_16_core_mc /\ + read PC s = word(pc) /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,8) s = a) - (\s. read PC s = word (pc + 0x490) /\ + (\s. read PC s = word (pc + LENGTH bignum_sqr_8_16_core_mc) /\ bignum_from_memory (z,16) s = a EXP 2) (MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, MAYCHANGE [memory :> bytes(z,8 * 16)] ,, MAYCHANGE SOME_FLAGS)`, MAP_EVERY X_GEN_TAC [`z:int64`; `x:int64`; `a:num`; `pc:num`] THEN - ABBREV_TAC `pc' = pc + 0x8` THEN - SUBGOAL_THEN `pc+1168=pc'+1160` SUBST_ALL_TAC THENL [ASM_ARITH_TAC;ALL_TAC] THEN - REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS; NONOVERLAPPING_CLAUSES; + BIGNUM_SQR_8_16_CORE_EXEC] THEN DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN - SUBGOAL_THEN `nonoverlapping_modulo (2 EXP 64) (val (z:int64),8 * 16) (pc',1172)` - ASSUME_TAC THENL [EXPAND_TAC "pc'" THEN NONOVERLAPPING_TAC; ALL_TAC] THEN ENSURES_INIT_TAC "s0" THEN BIGNUM_DIGITIZE_TAC "x_" `bignum_from_memory (x,8) s0` THEN @@ -491,30 +488,21 @@ let BIGNUM_SQR_8_16_CORE_CORRECT = prove let BIGNUM_SQR_8_16_CORRECT = prove( `!z x a pc. - nonoverlapping (word pc,0x49c) (z,8 * 16) + nonoverlapping (word pc,LENGTH bignum_sqr_8_16_mc) (z,8 * 16) ==> ensures arm (\s. aligned_bytes_loaded s (word pc) bignum_sqr_8_16_mc /\ read PC s = word(pc + 0x8) /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,8) s = a) - (\s. read PC s = word (pc + 0x490) /\ + (\s. read PC s = word ((pc + 0x8) + LENGTH bignum_sqr_8_16_core_mc) /\ bignum_from_memory (z,16) s = a EXP 2) (MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, MAYCHANGE [memory :> bytes(z,8 * 16)] ,, MAYCHANGE SOME_FLAGS)`, - REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP BIGNUM_SQR_8_16_CORE_CORRECT th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - EXISTS_TAC `x:int64` THEN - ASM_REWRITE_TAC[] THEN - REWRITE_TAC[bignum_sqr_8_16_core_mc_def;BIGNUM_SQR_8_16_EXEC; - WORD_RULE`word (x+y)=word_add (word x) (word y)`] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN - MATCH_MP_TAC ALIGNED_BYTES_LOADED_SUB_LIST THEN - ASM_REWRITE_TAC[] THEN - CONV_TAC NUM_DIVIDES_CONV);; + ARM_SUB_LIST_OF_MC_TAC BIGNUM_SQR_8_16_CORE_CORRECT + bignum_sqr_8_16_core_mc_def + [BIGNUM_SQR_8_16_CORE_EXEC;BIGNUM_SQR_8_16_EXEC]);; let BIGNUM_SQR_8_16_SUBROUTINE_CORRECT = prove (`!z x a pc stackpointer returnaddress. @@ -536,5 +524,8 @@ let BIGNUM_SQR_8_16_SUBROUTINE_CORRECT = prove MAYCHANGE [memory :> bytes(z,8 * 16); memory :> bytes(word_sub stackpointer (word 32),32)])`, ARM_ADD_RETURN_STACK_TAC - BIGNUM_SQR_8_16_EXEC BIGNUM_SQR_8_16_CORRECT + BIGNUM_SQR_8_16_EXEC + ((CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) o + REWRITE_RULE [BIGNUM_SQR_8_16_EXEC;BIGNUM_SQR_8_16_CORE_EXEC; + GSYM ADD_ASSOC]) BIGNUM_SQR_8_16_CORRECT) `[X19;X20;X21;X22]` 32);; diff --git a/arm/proofs/bignum_sqr_8_16_neon.ml b/arm/proofs/bignum_sqr_8_16_neon.ml index 3ac71128c..d226fd989 100644 --- a/arm/proofs/bignum_sqr_8_16_neon.ml +++ b/arm/proofs/bignum_sqr_8_16_neon.ml @@ -402,7 +402,7 @@ let bignum_sqr_8_16_neon_core_mc_def, (** Equivalence relation at the begin and end of the two programs (after stack push/pops stripped **) -(* Equiv. states at (scalar v., neon v.) = (pc + 8,pc + 8) +(* Equiv. states at the 'core' parts of (scalar v., neon v.). x and y are parameterized for nonoverlapping. *) let bignum_sqr_8_16_equiv_input_states = new_definition @@ -414,7 +414,7 @@ let bignum_sqr_8_16_equiv_input_states = new_definition bignum_from_memory (x,8) s1 = a /\ bignum_from_memory (x,8) s1' = a)`;; -(* Equiv. states at (scalar v., neon v.) = (pc + 1168,pc + 1464) *) +(* Equiv. states at the ends of 'core' parts of (scalar v., neon v.) *) let bignum_sqr_8_16_equiv_output_states = new_definition `!s1 s1' x z. (bignum_sqr_8_16_equiv_output_states:(armstate#armstate)->int64->int64->bool) (s1,s1') x z <=> @@ -581,17 +581,17 @@ let actions2 = [ let equiv_goal = mk_equiv_statement `ALL (nonoverlapping (z,8 * 16)) [ - (word pc:int64,LENGTH bignum_sqr_8_16_mc); - (word pc2:int64,LENGTH bignum_sqr_8_16_neon_mc); + (word pc:int64,LENGTH bignum_sqr_8_16_core_mc); + (word pc2:int64,LENGTH bignum_sqr_8_16_neon_core_mc); (x,8 * 8)]` bignum_sqr_8_16_equiv_input_states bignum_sqr_8_16_equiv_output_states - bignum_sqr_8_16_core_mc 8 + bignum_sqr_8_16_core_mc 0 `MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, MAYCHANGE [memory :> bytes(z,8 * 16)] ,, MAYCHANGE SOME_FLAGS` - bignum_sqr_8_16_neon_core_mc 8 + bignum_sqr_8_16_neon_core_mc 0 `MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5; Q6; Q7; Q16; Q17; Q18; Q19; Q20; @@ -604,10 +604,9 @@ extra_word_CONV := [GEN_REWRITE_CONV I [WORD_BITMANIP_SIMP_LEMMAS; WORD_SQR64_HI; WORD_SQR64_LO; WORD_MUL64_LO; WORD_MUL64_HI]] @ (!extra_word_CONV);; -let BIGNUM_SQR_8_16_EQUIV = prove(equiv_goal, +let BIGNUM_SQR_8_16_CORE_EQUIV = prove(equiv_goal, REWRITE_TAC[SOME_FLAGS;ALL;NONOVERLAPPING_CLAUSES; - BIGNUM_SQR_8_16_EXEC;BIGNUM_SQR_8_16_NEON_EXEC; BIGNUM_SQR_8_16_CORE_EXEC;BIGNUM_SQR_8_16_NEON_CORE_EXEC] THEN REPEAT STRIP_TAC THEN (** Initialize **) @@ -616,17 +615,6 @@ let BIGNUM_SQR_8_16_EQUIV = prove(equiv_goal, ASM_PROPAGATE_DIGIT_EQS_FROM_EXPANDED_BIGNUM_TAC THEN (* necessary to run ldr qs *) COMBINE_READ_BYTES64_PAIRS_TAC THEN - (* adding offset due to the size difference between .._mc and .._core_mc *) - ASSERT_NONOVERLAPPING_MODULO_TAC - `nonoverlapping_modulo (2 EXP 64) - (val (z:int64),8 * 16) (pc + 8,LENGTH bignum_sqr_8_16_core_mc)` - BIGNUM_SQR_8_16_CORE_EXEC THEN - ASSERT_NONOVERLAPPING_MODULO_TAC - `nonoverlapping_modulo (2 EXP 64) - (val (z:int64),8 * 16) (pc2 + 8,LENGTH bignum_sqr_8_16_neon_core_mc)` - BIGNUM_SQR_8_16_NEON_CORE_EXEC THEN - ABBREV_TAC `pc' = pc + 8` THEN - ABBREV_TAC `pc2' = pc2 + 8` THEN (* Start *) EQUIV_STEPS_TAC actions BIGNUM_SQR_8_16_CORE_EXEC BIGNUM_SQR_8_16_NEON_CORE_EXEC THEN @@ -644,12 +632,6 @@ let BIGNUM_SQR_8_16_EQUIV = prove(equiv_goal, (* Prove remaining clauses from the postcondition *) ASM_REWRITE_TAC[] THEN REPEAT CONJ_TAC THENL [ - (** SUBGOAL 0. PC **) - EXPAND_TAC "pc'" THEN CONV_TAC WORD_RULE; - - (** SUBGOAL 1. PC2 **) - EXPAND_TAC "pc2'" THEN CONV_TAC WORD_RULE; - (** SUBGOAL 2. Outputs **) ASM_REWRITE_TAC[bignum_sqr_8_16_equiv_output_states;mk_equiv_regs;mk_equiv_bool_regs; BIGNUM_EXPAND_CONV `bignum_from_memory (ptr,8) state`; @@ -670,13 +652,14 @@ extra_word_CONV := _org_extra_word_CONV;; let event_n_at_pc_goal = mk_eventually_n_at_pc_statement - `nonoverlapping (word pc:int64,LENGTH bignum_sqr_8_16_mc) (z:int64,8 * 16)` - [`z:int64`;`x:int64`] 8 bignum_sqr_8_16_core_mc 8 + `nonoverlapping (word pc:int64, + LENGTH (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes)) (z:int64,8 * 16)` + [`z:int64`;`x:int64`] 0 bignum_sqr_8_16_core_mc 0 `(\s0. C_ARGUMENTS [z;x] s0)`;; let BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, - REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_CORE_EXEC;BIGNUM_SQR_8_16_EXEC; + REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_CORE_EXEC; BARRIER_INST_BYTES_LENGTH] THEN REWRITE_TAC[eventually_n_at_pc;ALL;NONOVERLAPPING_CLAUSES;C_ARGUMENTS] THEN SUBGOAL_THEN `4 divides (LENGTH bignum_sqr_8_16_core_mc)` @@ -684,23 +667,9 @@ let BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC = prove(event_n_at_pc_goal, BIGNUM_SQR_8_16_CORE_EXEC]) THENL [ REWRITE_TAC[BIGNUM_SQR_8_16_CORE_EXEC] THEN CONV_TAC NUM_DIVIDES_CONV; ALL_TAC] THEN - REPEAT_N 3 GEN_TAC THEN + REPEAT GEN_TAC THEN (* nonoverlapping *) STRIP_TAC THEN - (* Abbreviate pc+8 as pc' because EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC likes pc without offsets *) - ASSERT_NONOVERLAPPING_MODULO_TAC - `nonoverlapping_modulo (2 EXP 64) (val (z:int64), 128) (pc+8, LENGTH bignum_sqr_8_16_mc - 8)` - BIGNUM_SQR_8_16_EXEC THEN - ABBREV_TAC `pc'=pc+8` THEN - SUBGOAL_THEN - `pc + 8 + LENGTH bignum_sqr_8_16_core_mc = pc' + LENGTH bignum_sqr_8_16_core_mc` - MP_TAC THENL [ - REWRITE_TAC[BIGNUM_SQR_8_16_CORE_EXEC] THEN - CONV_TAC (ONCE_DEPTH_CONV NUM_SUB_CONV) THEN - EXPAND_TAC "pc'" THEN CONV_TAC WORD_ARITH; ALL_TAC] THEN - DISCH_THEN (fun th -> - let th = REWRITE_RULE[BIGNUM_SQR_8_16_CORE_EXEC] th in - REWRITE_TAC [CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) th]) THEN (* now start..! *) X_GEN_TAC `s0:armstate` THEN GEN_TAC THEN STRIP_TAC THEN (* eventually ==> eventually_n *) @@ -714,17 +683,16 @@ let BIGNUM_SQR_8_16_CORE_CORRECT_N = BIGNUM_SQR_8_16_CORE_CORRECT BIGNUM_SQR_8_16_EVENTUALLY_N_AT_PC;; - -let BIGNUM_SQR_8_16_NEON_CORRECT = prove( +let BIGNUM_SQR_8_16_NEON_CORE_CORRECT = prove( `!z x a pc2. ALL (nonoverlapping (z,8 * 16)) - [(word pc2,1476); (x,8 * 8)] + [(word pc2,LENGTH bignum_sqr_8_16_neon_core_mc); (x,8 * 8)] ==> ensures arm - (\s. aligned_bytes_loaded s (word pc2) bignum_sqr_8_16_neon_mc /\ - read PC s = word(pc2 + 0x8) /\ + (\s. aligned_bytes_loaded s (word pc2) bignum_sqr_8_16_neon_core_mc /\ + read PC s = word(pc2) /\ C_ARGUMENTS [z; x] s /\ bignum_from_memory (x,8) s = a) - (\s. read PC s = word (pc2 + 1464) /\ + (\s. read PC s = word (pc2 + LENGTH bignum_sqr_8_16_neon_core_mc) /\ bignum_from_memory (z,16) s = a EXP 2) (MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, @@ -734,19 +702,20 @@ let BIGNUM_SQR_8_16_NEON_CORRECT = prove( MAYCHANGE SOME_FLAGS)`, let mc_lengths_th = - map (fst o CONJ_PAIR) [BIGNUM_SQR_8_16_EXEC; BIGNUM_SQR_8_16_NEON_EXEC] in + map (fst o CONJ_PAIR) [BIGNUM_SQR_8_16_CORE_EXEC; BIGNUM_SQR_8_16_NEON_CORE_EXEC] in REPEAT GEN_TAC THEN (* Prepare pc for the 'left' program that does not overlap with buffers z and x. *) SUBGOAL_THEN `?pc. - nonoverlapping (z:int64,8 * 16) (word pc,LENGTH bignum_sqr_8_16_mc) /\ - nonoverlapping (word pc,LENGTH bignum_sqr_8_16_mc) (x:int64,8 * 8) /\ + nonoverlapping (z:int64,8 * 16) (word pc, + LENGTH (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes)) /\ + nonoverlapping (word pc, + LENGTH (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes)) (x:int64,8 * 8) /\ 4 divides val (word pc:int64)` MP_TAC THENL [ (** SUBGOAL 1 **) - REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_EXEC;BARRIER_INST_BYTES_LENGTH; + REWRITE_TAC[LENGTH_APPEND;BIGNUM_SQR_8_16_CORE_EXEC;BARRIER_INST_BYTES_LENGTH; NONOVERLAPPING_CLAUSES;ALL] THEN - CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN FIND_HOLE_TAC; (** SUBGOAL 2 **) @@ -755,8 +724,8 @@ let BIGNUM_SQR_8_16_NEON_CORRECT = prove( REPEAT_N 2 STRIP_TAC THEN - VCGEN_EQUIV_TAC BIGNUM_SQR_8_16_EQUIV BIGNUM_SQR_8_16_CORE_CORRECT_N - [BIGNUM_SQR_8_16_EXEC; BIGNUM_SQR_8_16_NEON_EXEC] THEN + VCGEN_EQUIV_TAC BIGNUM_SQR_8_16_CORE_EQUIV BIGNUM_SQR_8_16_CORE_CORRECT_N + mc_lengths_th THEN (* unfold definitions that may block further tactics *) RULE_ASSUM_TAC (REWRITE_RULE([ALL;NONOVERLAPPING_CLAUSES] @ mc_lengths_th)) THEN @@ -772,20 +741,9 @@ let BIGNUM_SQR_8_16_NEON_CORRECT = prove( ASM_REWRITE_TAC[bignum_sqr_8_16_equiv_input_states] THEN EXISTS_TAC `write (memory :> bytelist - (word (pc+8),LENGTH (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes))) + (word pc,LENGTH (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes))) (APPEND bignum_sqr_8_16_core_mc barrier_inst_bytes) - (write PC (word (pc+8)) s2)` THEN - SUBGOAL_THEN `aligned_bytes_loaded s2 (word (pc2 + 8):int64) bignum_sqr_8_16_neon_core_mc` - (fun th -> REWRITE_TAC[th]) THENL [ - REWRITE_TAC[bignum_sqr_8_16_neon_core_mc_def] THEN - IMP_REWRITE_TAC[WORD_ADD;ALIGNED_BYTES_LOADED_SUB_LIST] THEN - CONV_TAC NUM_DIVIDES_CONV; - ALL_TAC - ] THEN - SUBGOAL_THEN `4 divides val (word (pc+8):int64)` ASSUME_TAC THENL [ - IMP_REWRITE_TAC[DIVIDES_4_VAL_WORD_ADD_64] THEN CONV_TAC NUM_DIVIDES_CONV; - ALL_TAC - ] THEN + (write PC (word pc) s2)` THEN (* Expand variables appearing in the equiv relation *) PROVE_CONJ_OF_EQ_READS_TAC BIGNUM_SQR_8_16_CORE_EXEC THEN (* Now has only one subgoal: the equivalence! *) @@ -795,12 +753,35 @@ let BIGNUM_SQR_8_16_NEON_CORRECT = prove( NO_TAC; (** SUBGOAL 2. Postcond **) - MESON_TAC[bignum_sqr_8_16_equiv_output_states;BIGNUM_FROM_MEMORY_BYTES]; + MESON_TAC([bignum_sqr_8_16_equiv_output_states;BIGNUM_FROM_MEMORY_BYTES] @ + mc_lengths_th); (** SUBGOAL 3. Frame **) MESON_TAC[] ]);; +let BIGNUM_SQR_8_16_NEON_CORRECT = prove( + `!z x a pc2. + ALL (nonoverlapping (z,8 * 16)) + [(word pc2,LENGTH bignum_sqr_8_16_neon_mc); (x,8 * 8)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc2) bignum_sqr_8_16_neon_mc /\ + read PC s = word(pc2 + 0x8) /\ + C_ARGUMENTS [z; x] s /\ + bignum_from_memory (x,8) s = a) + (\s. read PC s = word (pc2 + (0x8 + LENGTH bignum_sqr_8_16_neon_core_mc)) /\ + bignum_from_memory (z,16) s = a EXP 2) + (MAYCHANGE [PC; X2; X3; X4; X5; X6; X7; X8; X9; X10; X11; X12; + X13; X14; X15; X16; X17; X19; X20; X21; X22] ,, + MAYCHANGE [Q0; Q1; Q2; Q3; Q4; Q5; Q6; Q7; Q16; Q17; Q18; Q19; Q20; + Q21; Q22; Q23; Q30] ,, + MAYCHANGE [memory :> bytes(z,8 * 16)] ,, + MAYCHANGE SOME_FLAGS)`, + + ARM_SUB_LIST_OF_MC_TAC BIGNUM_SQR_8_16_NEON_CORE_CORRECT + bignum_sqr_8_16_neon_core_mc_def + [BIGNUM_SQR_8_16_NEON_EXEC;BIGNUM_SQR_8_16_NEON_CORE_EXEC]);; + let BIGNUM_SQR_8_16_NEON_SUBROUTINE_CORRECT = prove (`!z x a pc stackpointer returnaddress. @@ -823,5 +804,7 @@ let BIGNUM_SQR_8_16_NEON_SUBROUTINE_CORRECT = prove memory :> bytes(word_sub stackpointer (word 32),32)])`, ARM_ADD_RETURN_STACK_TAC BIGNUM_SQR_8_16_NEON_EXEC - (REWRITE_RULE [BIGNUM_SQR_8_16_NEON_EXEC] BIGNUM_SQR_8_16_NEON_CORRECT) + ((CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV) o + REWRITE_RULE [BIGNUM_SQR_8_16_NEON_EXEC;BIGNUM_SQR_8_16_NEON_CORE_EXEC]) + BIGNUM_SQR_8_16_NEON_CORRECT) `[X19;X20;X21;X22]` 32);; diff --git a/arm/proofs/equiv.ml b/arm/proofs/equiv.ml index 817662f40..1f2a879ea 100644 --- a/arm/proofs/equiv.ml +++ b/arm/proofs/equiv.ml @@ -359,6 +359,7 @@ let ASSERT_NONOVERLAPPING_MODULO_TAC t core_exec = (* Assumption stash/recovery tactic. *) let left_prog_state_asms: (string * thm) list list ref = ref [];; +(* Stash `read e s = r` assumptions where s is a member of stnames. *) let STASH_ASMS_OF_READ_STATES (stnames:string list): tactic = fun (asl,g) -> let left_prog, others = List.partition (fun (name,th) -> @@ -855,9 +856,11 @@ let EQUIV_STEPS_TAC actions (execth1:thm) (execth2:thm): tactic = actions;; -(* Given eq = (`read c s = rhs`), abbreviate e as a fresh variable. +(* Given eq = (`read c s = rhs`), abbreviate rhs as a fresh variable. and push this at append_to. append_to is a list of `rhs = fresh_var` equalities. + The abbreviated formula `rhs = x_fresh` is not added as assumption, + unlike ABBREV_TAC. *) let ABBREV_READ_TAC (eq:term) (append_to:thm list ref):tactic = W(fun (asl,g) -> @@ -877,11 +880,13 @@ let ABBREV_READ_TAC (eq:term) (append_to:thm list ref):tactic = ABBREV_TAC (mk_eq (fresh_var,rhs)) THEN (fun (asl,g) -> append_to := (snd (List.hd asl))::!append_to; - ALL_TAC (asl,g)));; + ALL_TAC (List.tl asl,g)));; (* Simulate an instruction of the left program and assign fresh variables to the RHSes of new state equations (`read c s = RHS`). store_to is a list of `RHS = assigned_fresh_var` theorems. + The equations on assigned fresh variables (`RHS = assigned_fresh_var`) + do not appear as assumptions. *) let ARM_STEP'_AND_ABBREV_TAC = @@ -902,17 +907,30 @@ let ARM_STEP'_AND_ABBREV_TAC = (fun th -> ABBREV_READ_TAC (concl th) store_to) update_eqs_prog_list);; +(* store_to is a reference to list of state numbers and abbreviations. + It is initialized as empty when this tactic starts. + Unlike ARM_STEP'_AND_ABBREV_TAC, the equations on assigned fresh variables + (`RHS = assigned_fresh_var`) are added as assumptions. *) let ARM_STEPS'_AND_ABBREV_TAC (execth:thm) (snums:int list) (store_to: (int * thm) list ref):tactic = + W (fun (asl,g) -> store_to := []; ALL_TAC) THEN MAP_EVERY (fun n -> let stname = "s" ^ (string_of_int n) in let store_to_n = ref [] in + (fun (asl,g) -> + let _ = Printf.printf "Stepping to state %s..\n" stname in + ALL_TAC (asl,g)) THEN ARM_STEP'_AND_ABBREV_TAC execth stname store_to_n THEN (fun (asl,g) -> store_to := (map (fun x -> (n,x)) !store_to_n) @ !store_to; + Printf.printf "%d new abbreviations (%d in total)\n" + (List.length !store_to_n) (List.length !store_to); ALL_TAC (asl,g))) - snums;; + snums THEN + W (fun (asl,g) -> + MAP_EVERY (fun (_,x) -> ASSUME_TAC x) !store_to) THEN + CLARIFY_TAC;; let get_read_component (eq:term): term = let lhs = fst (dest_eq eq) in @@ -920,25 +938,38 @@ let get_read_component (eq:term): term = let _ = get_read_component `read X1 s = word 0`;; -(* For the right program *) +(* For the right program. abbrevs must be generated by ARM_STEPS'_AND_ABBREV_TAC. *) let ARM_STEPS'_AND_REWRITE_TAC (execth:thm) (snums:int list) (inst_map: int list) - (abbrevs: (int * thm) list ref):tactic = + (abbrevs: (int * thm) list ref): tactic = + (* Warning: no nested call of ARM_STEPS'_AND_REWRITE_TAC *) + let abbrevs_cpy:(int * thm) list ref = ref [] in + (* Drop the assumptions that are already in abbrevs, for speed! *) + (fun (asl,g) -> + abbrevs_cpy := !abbrevs; + let asl' = List.filter (fun (_,asm) -> + List.for_all (fun (_,abbr) -> abbr <> asm) !abbrevs) + asl in + ALL_TAC (asl', g)) THEN + (* Stash the left program's state equations first *) + (fun (asl,g) -> + let cur_stname = name_of (rand g) in + STASH_ASMS_OF_READ_STATES [cur_stname] (asl,g)) THEN MAP_EVERY (fun n -> let stname = "s'" ^ (string_of_int n) in let new_state_eq = ref (REFL `T`) in - (* Stash the left program's state equations first *) - (fun (asl,g) -> - let cur_stname = name_of (rand g) in - STASH_ASMS_OF_READ_STATES [cur_stname] (asl,g)) THEN + W (fun (asl,g) -> + let _ = Printf.printf "Stepping to state %s.. (has %d remaining abbrevs)\n" + stname (List.length !abbrevs_cpy) in + ALL_TAC) THEN MATCH_MP_TAC EVENTUALLY_N_SWAP THEN ARM_STEP'_TAC (execth::[]) stname (Some new_state_eq) THEN DISCARD_OLDSTATE'_TAC [stname] false THEN MATCH_MP_TAC EVENTUALLY_N_SWAP THEN - RECOVER_ASMS_OF_READ_STATES THEN (fun (asl,g) -> let n_at_lprog = List.nth inst_map (n-1) in - let abbrevs_for_st_n = List.filter (fun (n',t)->n'=n_at_lprog) !abbrevs in + let abbrevs_for_st_n, leftover = List.partition (fun (n',t)->n'=n_at_lprog) !abbrevs_cpy in + let _ = abbrevs_cpy := leftover in let new_state_eqs = CONJUNCTS !new_state_eq in (* filter out read PC *) let new_state_eqs = List.filter @@ -953,8 +984,14 @@ let ARM_STEPS'_AND_REWRITE_TAC (execth:thm) (snums:int list) (inst_map: int list (fun (_,th') -> fst (dest_eq (concl th')) = rhs) abbrevs_for_st_n with | Some (_,rhs_to_abbrev) -> - let th' = SPEC rhs EQ_REFL in - Some (GEN_REWRITE_RULE RAND_CONV [rhs_to_abbrev] th') + (try + let th' = ISPEC rhs EQ_REFL in + Some (GEN_REWRITE_RULE RAND_CONV [rhs_to_abbrev] th') + with _ -> + (Printf.printf "Failed to proceed.\n"; + Printf.printf "- rhs: `%s`\n" (string_of_term rhs); + Printf.printf "- rhs_to_abbrev: `%s`\n" (string_of_thm rhs_to_abbrev); + failwith "ARM_STEPS'_AND_REWRITE_TAC")) | None -> (* This case happens when new_state_eq already has abbreviated RHS *) None) new_state_eqs in @@ -964,8 +1001,11 @@ let ARM_STEPS'_AND_REWRITE_TAC (execth:thm) (snums:int list) (inst_map: int list n (List.length new_state_eqs) (List.length abbrevs_for_st_n); Printf.printf " new state eq:\n"; List.iter (fun t -> Printf.printf " %s\n" (string_of_term (concl t))) new_state_eqs; + Printf.printf " old state eq:\n"; + List.iter (fun (_,t) -> Printf.printf " %s\n" (string_of_term (concl t))) abbrevs_for_st_n; failwith "ARM_STEPS'_AND_REWRITE_TAC"))) - snums;; + snums THEN + RECOVER_ASMS_OF_READ_STATES;; (* An ad-hoc tactic for proving a goal @@ -983,8 +1023,10 @@ let PROVE_CONJ_OF_EQ_READS_TAC (execth:thm) = (* for memory updates *) (ASM_REWRITE_TAC[aligned_bytes_loaded;bytes_loaded] THEN EXPAND_RHS_TAC THEN - REWRITE_TAC[LENGTH_APPEND;execth;BARRIER_INST_BYTES_LENGTH] THEN - READ_OVER_WRITE_ORTHOGONAL_TAC) ORELSE + ((REWRITE_TAC[LENGTH_APPEND;execth;BARRIER_INST_BYTES_LENGTH] THEN + READ_OVER_WRITE_ORTHOGONAL_TAC) ORELSE + (* sometimes the rewrites are not necessary.. *) + READ_OVER_WRITE_ORTHOGONAL_TAC)) ORELSE (ASM_REWRITE_TAC[aligned_bytes_loaded;bytes_loaded] THEN (MATCH_MP_TAC READ_OVER_WRITE_MEMORY_APPEND_BYTELIST ORELSE MATCH_MP_TAC READ_OVER_WRITE_MEMORY_BYTELIST) THEN @@ -1000,9 +1042,35 @@ let PROVE_CONJ_OF_EQ_READS_TAC (execth:thm) = by instantiating pc *) let TRY_CONST_PC_TAC (pc:term):tactic = TRY (EXISTS_TAC pc THEN - REPEAT CONJ_TAC THEN - TRY (MATCH_MP_TAC NONOVERLAPPING_MODULO_SIMPLE THEN ASM_ARITH_TAC) THEN - REWRITE_TAC[VAL_WORD;DIMINDEX_64;DIVIDES_DIV_MULT] THEN ARITH_TAC);; + (* The last clause of conjunctions is '4 divides ...'. *) + REWRITE_TAC[CONJ_ASSOC] THEN + CONJ_TAC THENL [ + ALL_TAC; + REWRITE_TAC[VAL_WORD;DIMINDEX_64;DIVIDES_DIV_MULT] THEN ARITH_TAC + ] THEN + REPEAT CONJ_TAC THEN + MATCH_MP_TAC NONOVERLAPPING_MODULO_SIMPLE THEN ASM_ARITH_TAC);; + +(* ts must be sorted *) +let rec SPLIT_RANGES_TAC (v:term) (ts:int list): tactic = + let rec fn (v:term) (ts:int list) (prevth:thm option):tactic = + begin match ts with + | [] -> ALL_TAC + | t::ts -> + ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num t))) THENL + [ ALL_TAC; + W (fun (asl,g) -> + (match prevth with + | Some prevth -> UNDISCH_THEN (concl prevth) (K ALL_TAC) + | None -> ALL_TAC) THEN + let prevth0:thm ref = ref (EQ_REFL) in + POP_ASSUM (fun th_save -> + let th_save = REWRITE_RULE [ARITH_RULE`!x k. ~(x < k) <=> k <= x`] th_save in + prevth0 := th_save; + ASSUME_TAC th_save) THEN + (fun (asl,g) -> fn v ts (Some !prevth0) (asl,g))) ] + end in + fn v ts None;; (* Prove goals like `?pc. nonoverlapping_modulo (2 EXP 64) (pc,36) (val addr_out,32) /\ @@ -1010,6 +1078,7 @@ let TRY_CONST_PC_TAC (pc:term):tactic = 4 divides val (word pc)` by finding a 'hole' from the memory layout which can place (pc,36). *) let FIND_HOLE_TAC: tactic = + CONV_TAC (ONCE_DEPTH_CONV (NUM_MULT_CONV ORELSEC NUM_ADD_CONV)) THEN fun (asl,g) -> (* Sanity-check the goal *) let pcvar, goal_body = dest_exists g in @@ -1056,10 +1125,8 @@ let FIND_HOLE_TAC: tactic = ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num (2*segsize))))) [`val (addr_in:int64)`;`val (addr_out:int64)`] *) let cases_tac = MAP_EVERY (fun (v:term) -> - List.fold_left then_ ALL_TAC - (List.map - (fun i -> ASM_CASES_TAC (mk_binary "<" (v,mk_numeral (num (i * segsize))))) - (1--(List.length word_ptrs)))) + SPLIT_RANGES_TAC v + (List.map (fun i -> i * segsize) (1--(List.length word_ptrs)))) val_word_ptrs in (* Invoke TRY_CONST_PC_TAC to try each hole. *) @@ -1070,8 +1137,6 @@ let FIND_HOLE_TAC: tactic = ((val_bound_prepare_tac THEN cases_tac THEN - RULE_ASSUM_TAC (REWRITE_RULE [ARITH_RULE`!x k. ~(x < k) <=> k <= x`]) THEN - TRY ASM_ARITH_TAC (* Remove invalid layouts *) THEN try_holes THEN PRINT_GOAL_TAC THEN NO_TAC) ORELSE FAIL_TAC "Has unresolved goals") (asl,g);; @@ -1095,28 +1160,66 @@ let to_ensures_n (ensures_form:term) (numsteps_fn:term): term = `ensures_n arm P Q Fr fn` in list_mk_forall (g_quants,mk_imp(g_asms,g_ensures_n));; +(* prove_correct_barrier_appended replaces `core_mc` with + `APPEND core_mc barrier_inst_bytes` inside assumption and precond. *) let prove_correct_barrier_appended (correct_th:thm) (core_exec_th:thm): thm = (* core_exec_th = `LENGTH core_mc = .. /\ (...)` *) let core_mc = snd (dest_comb (fst (dest_eq (concl (fst (CONJ_PAIR core_exec_th)))))) in let core_mc_with_barrier = mk_binop `APPEND:((8)word)list->((8)word)list->((8)word)list` core_mc `barrier_inst_bytes` in - let goal = subst [core_mc_with_barrier,core_mc] (concl correct_th) in + let update_ensures (t:term):term = + let pred,args = strip_comb t in + if name_of pred <> "ensures" then failwith "prove_correct_barrier_appended" else + let arg_step::arg_pre::arg_post::arg_frame::[] = args in + list_mk_comb (pred, + [arg_step; subst [core_mc_with_barrier,core_mc] arg_pre; arg_post; arg_frame]) + in + let update_imp_ensures (g:term):term = + if is_imp g then + let lhs,rhs = dest_imp g in + mk_imp (subst [core_mc_with_barrier,core_mc] lhs, update_ensures rhs) + else + update_ensures g + in + let goal = + let g = concl correct_th in + let args,g = strip_forall g in + list_mk_forall (args, update_imp_ensures g) + in prove(goal, REPEAT STRIP_TAC THEN - FIRST_ASSUM (fun th -> MP_TAC (MATCH_MP correct_th th)) THEN - REWRITE_TAC[ensures] THEN - DISCH_THEN (fun th -> REPEAT STRIP_TAC THEN MATCH_MP_TAC th) THEN - ASM_REWRITE_TAC[] THEN - let asm = subst [core_mc,`x:((8)word)list`] `4 divides LENGTH (x:((8)word)list)` in - SUBGOAL_THEN asm ASSUME_TAC THENL [ - (** SUBGOAL 1 **) - REWRITE_TAC[core_exec_th] THEN CONV_TAC NUM_DIVIDES_CONV; - (** SUBGOAL 2 **) + MP_TAC (SPEC_ALL correct_th) THEN + (* Prove antedecent of correct_th *) + ANTS_TAC THENL [ + POP_ASSUM MP_TAC THEN + REWRITE_TAC[ALL;NONOVERLAPPING_CLAUSES;LENGTH_APPEND; + BARRIER_INST_BYTES_LENGTH] THEN + STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + (NONOVERLAPPING_TAC ORELSE (PRINT_GOAL_TAC THEN NO_TAC)); ALL_TAC ] THEN - ASM_MESON_TAC[aligned_bytes_loaded_append]);; + + MATCH_MP_TAC ENSURES_SUBLEMMA_THM THEN + REWRITE_TAC[] THEN + REPEAT CONJ_TAC THENL [ + (* hyp. of aligned_bytes_loaded_append*) + (let asm = subst [core_mc,`x:((8)word)list`] `4 divides LENGTH (x:((8)word)list)` in + SUBGOAL_THEN asm ASSUME_TAC THENL [ + REWRITE_TAC[core_exec_th] THEN CONV_TAC NUM_DIVIDES_CONV; + + ALL_TAC + ] THEN + IMP_REWRITE_TAC[aligned_bytes_loaded_append] THEN + REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + IMP_REWRITE_TAC(CONJUNCTS ALIGNED_BYTES_LOADED_SUB_LIST) THEN + CONV_TAC NUM_DIVIDES_CONV); + + SUBSUMED_MAYCHANGE_TAC; + + MESON_TAC[] + ]);; let prove_correct_n (execth:thm) (core_execth:thm) (correct_th:thm) (event_n_at_pc_th:thm): thm = @@ -1128,7 +1231,10 @@ let prove_correct_n (execth:thm) (core_execth:thm) (correct_th:thm) TAUT `(P==>(!x. Q x)) <=> (!x. P==>Q x)`; TAUT`(PRECOND==>(P==>Q==>R))<=>(PRECOND/\P/\Q==>R)`] to_eventually_th in - let eventually_form = CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) correct_th in + (* unfold LENGTH mc and LENGTH (APPEND .. )) *) + let eventually_form = + (CONV_RULE (ONCE_DEPTH_CONV NUM_REDUCE_CONV) o + REWRITE_RULE[execth;core_execth;LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH]) correct_th in let eventually_form = REWRITE_RULE[ ensures; TAUT `(P==>(!x. Q x)) <=> (!x. P==>Q x)`; @@ -1143,12 +1249,19 @@ let prove_correct_n (execth:thm) (core_execth:thm) (correct_th:thm) else List.nth args 4 in let numsteps_fn = mk_abs (`s:armstate`,nsteps) in prove(to_ensures_n (concl correct_th) numsteps_fn, - (* Reduce the step function *) - CONV_TAC (ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN + (* Reduce the step function, and LENGTH *. *) + CONV_TAC ( + REWRITE_CONV[execth;core_execth;LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH] THENC + ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN (* use eventually_n_at_pc *) REWRITE_TAC[ensures_n] THEN REPEAT STRIP_TAC THEN REWRITE_TAC[GSYM CONJ_ASSOC] THEN + REWRITE_TAC[execth;core_execth] THEN MATCH_MP_TAC to_eventually_th THEN + (* Reduce the step function, and LENGTH *. *) + CONV_TAC ( + REWRITE_CONV[execth;core_execth;LENGTH_APPEND;BARRIER_INST_BYTES_LENGTH] THENC + ONCE_DEPTH_CONV NUM_REDUCE_CONV) THEN (ASM_MESON_TAC[eventually_form] ORELSE (PRINT_TAC ("ASM_MESON could not prove this goal. eventually_form: `" ^ (string_of_thm eventually_form) ^ "`") THEN @@ -1189,9 +1302,9 @@ let find_pc_varname (asl:(string * thm)list) (stname:string): string = 'write ... s = s2' and apply thm tactic *) let EXPAND_ARM_THEN (h_arm_hyp:string) (exec_decode_th:thm) (ttac:thm->tactic):tactic = REMOVE_THEN h_arm_hyp (fun th -> - (fun (asl,g) -> - let r = ONCE_REWRITE_RULE[ARM_CONV exec_decode_th (map snd asl) (concl th)] in - ttac (r th) (asl,g)));; + (fun (asl,g) -> + let r = ONCE_REWRITE_RULE[ARM_CONV exec_decode_th (map snd asl) (concl th)] in + ttac (r th) (asl,g)));; let EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC (h_arm_hyp:string) (exec_decode_th:thm) (exec_decode_len:thm):tactic = @@ -1236,14 +1349,19 @@ let UPDATE_PC_TAC (pc_var_name:string) (next_st_var_name:string) (next_pc_offset eventually arm (\s'. read PC s' = word (pc + {pc_init_ofs+4n}) /\ P s{k+1} s') s{k+1} ==> steps arm {n-k-1} s{k+1} s' ==> P s{k+1} s'` + + arm_print_log := true prints more info. *) let EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC (exec_decode:thm) (init_st_var:term) (pc_init_ofs:int) (k:int) (n:int):tactic = let exec_decode_len,exec_decode_th = CONJ_PAIR exec_decode and - k4::k4p4::n4::nmk4::nmk::nmkmone4::nmkmone::kpcofs4::k4p4pcofs4::npcofs4::[] = + k4::k4p4::n4::nmk4:: + nmk::nmkmone4::nmkmone::kpcofs4:: + k4p4pcofs4::npcofs4::[] = List.map (fun n -> mk_numeral (num n)) - [k*4; (k+1)*4; n*4; (n-k)*4; n-k; (n-k-1)*4; n-k-1; - pc_init_ofs+k*4;pc_init_ofs+(k+1)*4;pc_init_ofs+n*4] in + [k*4; (k+1)*4; n*4; (n-k)*4; + n-k; (n-k-1)*4; n-k-1; pc_init_ofs+k*4; + pc_init_ofs+(k+1)*4; pc_init_ofs+n*4] in let nmk_th = ARITH_RULE (subst [nmk,`nmk:num`;nmkmone,`nmkmone:num`] `nmk = 1 + nmkmone`) in @@ -1259,14 +1377,48 @@ let EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC else ALL_TAC (asl,g)) THEN ONCE_REWRITE_TAC[eventually_CASES] THEN REWRITE_TAC[nmk_th] THEN - (* PC mismatch *) - ASM_SIMP_TAC[WORD64_NE_ADD;WORD64_NE_ADD2; + (* Fold PC mismatch to false *) + ASM_REWRITE_TAC[] THEN + W (fun (asl,g) -> + let lhs = fst (dest_imp g) in + let lhs = fst (dest_disj lhs) in + let lhs = fst (dest_conj lhs) in + (* lhs must be `word (pc+kpcofs4) = word(pc+npcofs4)` *) + let l,r = dest_eq lhs in + let l,r = snd (dest_comb l), snd (dest_comb r) in + let l_pc, l_pcofs = + if is_binary "+" l + then dest_binary "+" l + else (l, `0`) in + let r_pc, r_pcofs = dest_binary "+" r in + if l_pc <> r_pc || l_pcofs <> kpcofs4 || r_pcofs <> npcofs4 then + (Printf.printf "Unexpected goal; kpcofs4: %s, npcofs4: %s\n" + (string_of_term kpcofs4) (string_of_term npcofs4); + Printf.printf "\tNot offsets of `%s`\n" (string_of_term lhs); + failwith "EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC") + else + (if !arm_print_log then + Printf.printf "Rewriting `%s` to false..\n" + (string_of_term lhs)); + SIMP_TAC[WORD64_NE_ADD;WORD64_NE_ADD2; + ARITH_RULE(mk_lt kpcofs4 npcofs4); + ARITH_RULE(mk_lt npcofs4 `2 EXP 64`)]) THEN + (*ASM_SIMP_TAC[WORD64_NE_ADD;WORD64_NE_ADD2; ARITH_RULE(mk_lt kpcofs4 npcofs4); - ARITH_RULE(mk_lt npcofs4 `2 EXP 64`)] THEN + ARITH_RULE(mk_lt npcofs4 `2 EXP 64`)] THEN*) ONCE_REWRITE_TAC[STEPS_STEP;STEPS_ONE] THEN - DISCH_THEN (fun th -> let _,th2 = CONJ_PAIR th in - LABEL_TAC "HEVENTUALLY" th2) THEN + (fun (asl,g) -> + if !arm_print_log then + (Printf.printf "\n"; + PRINT_GOAL_TAC (asl,g)) + else ALL_TAC (asl,g)) THEN + DISCH_THEN (fun th -> + try let _,th2 = CONJ_PAIR th in + LABEL_TAC "HEVENTUALLY" th2 + with _ -> + (Printf.printf "Not a conjunction: %s\n" (string_of_thm th); + failwith "EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC")) THEN DISCH_THEN (LABEL_TAC "HSTEPS") THEN (* If HSTEPS is `?s'. arm s s' /\ steps ...`, choose s'. Otherwise, it is `arm s s_final`; don't touch it *) @@ -1279,6 +1431,11 @@ let EVENTUALLY_TAKE_STEP_RIGHT_FORALL_TAC REMOVE_THEN "HEVENTUALLY" (fun th -> USE_THEN "HARM" (fun th2 -> MP_TAC (MATCH_MP th th2))) THEN + (fun (asl,g) -> + if !arm_print_log then + (Printf.printf "\n"; + PRINT_GOAL_TAC (asl,g)) + else ALL_TAC (asl,g)) THEN (* get explicit definition of the next step *) EXPAND_ARM_AND_UPDATE_BYTES_LOADED_TAC "HARM" exec_decode_th exec_decode_len THEN W (fun (asl,g) -> @@ -1325,7 +1482,10 @@ let EVENTUALLY_STEPS_EXISTS_STEP_TAC (exec_decode:thm) (k:int) (next_pc_ofs:int) (* Prove: eventually arm (\s'. read PC s' = word (pc' + 1160) /\ P s0 s') s0 - ==> eventually_n arm n (\s'. read PC s' = word (pc' + 1160) /\ P s0 s') s0 *) + ==> eventually_n arm n (\s'. read PC s' = word (pc' + 1160) /\ P s0 s') s0 + PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC works well when in assumption + 'read PC s0 = word pc' pc is not something like 'pc + ..'. +*) let PROVE_EVENTUALLY_IMPLIES_EVENTUALLY_N_TAC execth = let mc_length_th = fst (CONJ_PAIR execth) in let n = Num.int_of_num (dest_numeral (snd (dest_eq (concl mc_length_th)))) / 4 in @@ -1421,6 +1581,10 @@ let mk_equiv_statement (assum:term) (equiv_in:thm) (equiv_out:thm) [`:armstate`;`:armstate`;`:armstate`;`:armstate`] in let quants = union quants_in quants_out in let quants = [`pc:num`;`pc2:num`] @ quants in + (* There might be more free variables in 'assum'. Let's add them too. *) + let quants = quants @ + (List.filter (fun t -> not (mem t quants)) (frees assum)) in + (* Now build 'ensures2' *) let mk_aligned_bytes_loaded (s:term) (pc_var:term) (pc_ofs:int) (mc:term) = let _ = List.map2 type_check [s;pc_var;mc] [`:armstate`;`:num`;`:((8)word)list`] in @@ -1490,8 +1654,10 @@ let mk_equiv_statement (assum:term) (equiv_in:thm) (equiv_out:thm) verification condition from two lemmas: 1. equiv_th: a program equivalence theorem between p and another program p2 2. correct_n_th: a specification of p2 in `ensures_n`. - execths is a list of *_EXEC theorems for p1 and p2. - The result of tactic is conjunction of three clauses. *) + execths is a list of *_EXEC theorems for p1 and p2 used in equiv_th's + hypotheses, specifically the nonoverlapping predicates. + The result of tactic is conjunction of three clauses. + If arm_print_log is set to true, it prints more info. *) let VCGEN_EQUIV_TAC equiv_th correct_n_th (execths:thm list) = let stepfn = let b = snd (strip_forall (concl equiv_th)) in @@ -1510,9 +1676,13 @@ let VCGEN_EQUIV_TAC equiv_th correct_n_th (execths:thm list) = (* Prove the nonoverlapping assumptions here: (ASSUM ==> ensures_n) ==> ensures_n *) + W (fun (asl,g) -> + if !arm_print_log then PRINT_GOAL_TAC + else ALL_TAC) THEN W (fun (asl,g) -> if is_imp g then - let r = ([ALL;NONOVERLAPPING_CLAUSES] @ execths) in + let r = ([ALL;NONOVERLAPPING_CLAUSES;LENGTH_APPEND; + BARRIER_INST_BYTES_LENGTH] @ execths) in SUBGOAL_THEN (fst (dest_imp (fst (dest_imp g)))) (fun th -> REWRITE_TAC[th]) THENL [ REWRITE_TAC r THEN RULE_ASSUM_TAC(REWRITE_RULE r) THEN diff --git a/arm/proofs/instruction.ml b/arm/proofs/instruction.ml index 73cf4b3ef..e127906b6 100644 --- a/arm/proofs/instruction.ml +++ b/arm/proofs/instruction.ml @@ -143,6 +143,20 @@ let ALIGNED_BYTES_LOADED_SUB_LIST = prove ONCE_REWRITE_TAC[GSYM MOD_ADD_MOD] THEN CONV_TAC NUM_REDUCE_CONV THEN SIMP_TAC[] THEN CONV_TAC NUM_REDUCE_CONV);; +let ALIGNED_BYTES_LOADED_SUB_LIST = prove + (`(!s pc l m n. + aligned_bytes_loaded s pc l /\ 4 divides m + ==> aligned_bytes_loaded s (word_add pc (word m)) (SUB_LIST(m,n) l)) /\ + (!s pc l n. + aligned_bytes_loaded s pc l + ==> aligned_bytes_loaded s pc (SUB_LIST(0,n) l))`, + REWRITE_TAC[ALIGNED_BYTES_LOADED_SUB_LIST] THEN + REPEAT STRIP_TAC THEN + GEN_REWRITE_TAC(RATOR_CONV o RAND_CONV) + [GSYM (fst (CONJ_PAIR WORD_ADD_0))] THEN + IMP_REWRITE_TAC[WORD_ADD;ALIGNED_BYTES_LOADED_SUB_LIST] THEN + CONV_TAC NUM_DIVIDES_CONV);; + let ALIGNED_BYTES_LOADED_TRIM_LIST = prove (`!s pc l m n. aligned_bytes_loaded s pc l /\ 4 divides m diff --git a/arm/proofs/neon_helper.ml b/arm/proofs/neon_helper.ml index 9eb0b4a20..ed4b45934 100644 --- a/arm/proofs/neon_helper.ml +++ b/arm/proofs/neon_helper.ml @@ -9,6 +9,62 @@ needs "common/misc.ml";; +(* Simplifies word_subword + word_join + word_zx + bitwise ops. + Also tries to simplify word_shr. *) +let WORD_BITMANIP_SIMP_LEMMAS = prove( + `!(x32:(32)word) (y32:(32)word) (x32_2:(32)word) + (x64:(64)word) (y64:(64)word) (x64_2:(64)word) (y64_2:(64)word) + (y128:(128)word). + // word_subword + word_subword (word_subword y128 (0,64):(64)word) (0,32):(32)word = + word_subword y128 (0,32):(32)word /\ + word_subword (word_subword y128 (64,64):(64)word) (0,32):(32)word = + word_subword y128 (64,32):(32)word /\ + word_subword (word_subword y128 (0,64):(64)word) (32,32):(32)word = + word_subword y128 (32,32):(32)word /\ + word_subword (word_subword y128 (64,64):(64)word) (32,32):(32)word = + word_subword y128 (96,32):(32)word /\ + word_subword + (word 79228162495817593524129366015:(128)word) (64,64):(64)word = + word 4294967295 /\ + word_subword + (word 79228162495817593524129366015:(128)word) (0,64):(64)word = + word 4294967295 /\ + // word_subword + word_join + word_subword (word_join x32 y32: (64)word) (0,32) = y32 /\ + word_subword (word_join x32 y32: (64)word) (32,32) = x32 /\ + word_subword (word_join x64 y64: (128)word) (0,64) = y64 /\ + word_subword (word_join x64 y64: (128)word) (64,64) = x64 /\ + word_subword (word_join x64 y64: (128)word) (0,32):(32)word = + word_subword y64 (0,32):(32)word /\ + word_subword (word_join x64 y64: (128)word) (32,32):(32)word = + word_subword y64 (32,32):(32)word /\ + word_subword (word_join x64 y64: (128)word) (64,32):(32)word = + word_subword x64 (0,32):(32)word /\ + word_subword (word_join x64 y64: (128)word) (96,32):(32)word = + word_subword x64 (32,32):(32)word /\ + word_subword + (word_join + (word_join x64_2 x64: (128)word) + (word_join y64_2 y64: (128)word): (256)word) + (64,128):(128)word = word_join x64 y64_2 /\ + // word_subword + word_zx + word_subword (word_zx x64:(128)word) (0,32):(32)word = word_subword x64 (0,32) /\ + word_subword (word_zx x32:(64)word) (0,32):(32)word = x32 /\ + word_subword (word_subword x64 (0,128):(128)word) (0,32):(32)word = word_subword x64 (0,32) /\ + word_subword (word_zx x64:(128)word) (32,32):(32)word = word_subword x64 (32,32) /\ + word_subword (word_subword x64 (0,128):(128)word) (32,32):(32)word = word_subword x64 (32,32) /\ + // word_subword + word_and + word_join + word_subword (word_and y128 (word_join x64_2 x64:(128)word)) (64,64) = + word_and (word_subword y128 (64,64):(64)word) x64_2 /\ + word_subword (word_and y128 (word_join x64_2 x64:(128)word)) (0,64) = + word_and (word_subword y128 (0,64):(64)word) x64 /\ + // consuming word_ushrs. + word_subword (word_ushr x64 32) (0,32):(32)word = word_subword x64 (32,32) /\ + word_ushr (word_join x32_2 x32:(64)word) 32 = word_zx x32_2`, + CONV_TAC WORD_BLAST);; + + let SPLIT_WORD64_TO_HILO: tactic = SUBST1_TAC (WORD_BLAST `(x:(64)word) = word_join (word_subword x (32,32):(32)word) (word_subword x (0,32):(32)word)`) THEN @@ -17,17 +73,23 @@ let SPLIT_WORD64_TO_HILO: tactic = ASSUME_TAC (REWRITE_RULE [DIMINDEX_32] (ISPECL [`xh:(32)word`] VAL_BOUND)) THEN ASSUME_TAC (REWRITE_RULE [DIMINDEX_32] (ISPECL [`xl:(32)word`] VAL_BOUND));; +(* Low 64-bits of 64x64->128-bit squaring *) let WORD_SQR64_LO = prove(`! (x:(64)word). word_or (word_shl (word_add - (word_and (word 4294967295) - (word_add - (word_mul (word_ushr x 32) (word_zx (word_subword x (0,32):(32)word))) - (word_ushr - (word_mul (word_zx (word_subword x (0,32):(32)word)) - (word_zx (word_subword x (0,32):(32)word))) - 32))) - (word_mul (word_ushr x 32) (word_zx (word_subword x (0,32):(32)word)))) + (word_and + (word 4294967295) + (word_add + (word_mul + (word_zx (word_subword x (32,32):(32)word)) + (word_zx (word_subword x (0,32):(32)word))) + (word_ushr + (word_mul (word_zx (word_subword x (0,32):(32)word)) + (word_zx (word_subword x (0,32):(32)word))) + 32))) + (word_mul + (word_zx (word_subword x (32,32):(32)word):(64)word) + (word_zx (word_subword x (0,32):(32)word)))) 32) (word_and (word_mul (word_zx (word_subword x (0,32):(32)word)) @@ -35,7 +97,9 @@ let WORD_SQR64_LO = prove(`! (x:(64)word). word_or (word 4294967295)) = word (0 + val x * val x)`, REWRITE_TAC [WORD_RULE `word (0 + val (a:(64)word) * val (b:(64)word)) = - word_mul (a:(64)word) (b:(64)word)`] THEN + word_mul (a:(64)word) (b:(64)word)`; + WORD_BLAST `word_zx (word_subword x (32,32):(32)word):(64)word = + word_ushr x 32`] THEN REPEAT GEN_TAC THEN SPLIT_WORD64_TO_HILO THEN REWRITE_TAC[WORD_BITMANIP_SIMP_LEMMAS] THEN @@ -43,7 +107,7 @@ let WORD_SQR64_LO = prove(`! (x:(64)word). word_or let r = REWRITE_TAC [VAL_WORD_ADD; VAL_WORD_MUL; VAL_WORD_ZX_GEN; VAL_WORD_SUBWORD; VAL_WORD; VAL_WORD_SHL; WORD_OF_BITS_32BITMASK; VAL_WORD_AND_MASK; VAL_WORD_USHR; VAL_WORD_JOIN; WORD_OR_ADD_DISJ] in - (r THEN ONCE_REWRITE_TAC [WORD_RULE `word_and x y = word_and y x`] THEN r) + (r THEN ONCE_REWRITE_TAC [WORD_AND_SYM] THEN r) THEN REWRITE_TAC[DIMINDEX_64; DIMINDEX_32; ARITH_RULE `MIN 32 32 = 32 /\ MIN 32 64 = 32 /\ MIN 64 32 = 32`; @@ -82,22 +146,31 @@ let WORD_SQR64_LO = prove(`! (x:(64)word). word_or ARITH_TAC);; let WORD_SQR64_HI = prove(`!(x:(64)word). word_add - (word_add (word_mul (word_ushr x 32) (word_ushr x 32)) - (word_ushr - (word_add - (word_and - (word 4294967295) - (word_add - (word_mul (word_ushr x 32) (word_zx (word_subword x (0,32):(32)word))) - (word_ushr - (word_mul (word_zx (word_subword x (0,32):(32)word)) - (word_zx (word_subword x (0,32):(32)word))) - 32))) - (word_mul (word_ushr x 32) (word_zx (word_subword x (0,32):(32)word)))) - 32)) + (word_add + (word_mul + (word_zx (word_subword x (32,32):(32)word):(64)word) + (word_zx (word_subword x (32,32):(32)word):(64)word)) + (word_ushr + (word_add + (word_and + (word 4294967295) + (word_add + (word_mul + (word_zx (word_subword x (32,32):(32)word):(64)word) + (word_zx (word_subword x (0,32):(32)word))) + (word_ushr + (word_mul (word_zx (word_subword x (0,32):(32)word)) + (word_zx (word_subword x (0,32):(32)word))) + 32))) + (word_mul + (word_zx (word_subword x (32,32):(32)word):(64)word) + (word_zx (word_subword x (0,32):(32)word)))) + 32)) (word_ushr (word_add - (word_mul (word_ushr x 32) (word_zx (word_subword x (0,32):(32)word))) + (word_mul + (word_zx (word_subword x (32,32):(32)word):(64)word) + (word_zx (word_subword x (0,32):(32)word))) (word_ushr (word_mul (word_zx (word_subword x (0,32):(32)word)) @@ -303,6 +376,33 @@ let WORD_MUL64_HI = prove(`!(x: (64)word) (y: (64)word). AP_THM_TAC THEN AP_TERM_TAC THEN ARITH_TAC);; +(* Low 64-bits of 64x64->128-bit squaring (version 2) *) +let WORD_SQR64_LO2 = prove( + `!(x:(64)word). + word_add + (word_mul + (word_zx (word_subword x (0,32):(32)word):(64)word) + (word_zx (word_subword x (0,32):(32)word):(64)word)) + (word_shl + (word_mul (word_zx (word_subword x (0,32):(32)word):(64)word) + (word_zx (word_subword x (32,32):(32)word):(64)word)) + 33) = + word (0 + val x * val x)`, + + REWRITE_TAC[GSYM WORD_MUL64_LO] THEN + GEN_TAC THEN AP_TERM_TAC THEN + REWRITE_TAC[ARITH_RULE`33=1+32`;GSYM WORD_SHL_COMPOSE] THEN + REWRITE_TAC[WORD_RULE `word_shl x 1 = word_add x x`] THEN + REWRITE_TAC[WORD_BLAST `!(x:(64)word) y. + word_shl x 32 = word_shl y 32 + <=> word_subword x (0,32):(32)word = word_subword y (0,32)`] THEN + IMP_REWRITE_TAC[WORD_SUBWORD_ADD] THEN + REWRITE_TAC(ADD_0 :: map ARITH_RULE [`x EXP 0 = 1`;`x MOD 1=0`;`0<1`]) THEN + REWRITE_TAC[DIMINDEX_32;DIMINDEX_64;ARITH_RULE`0+32<=64`] THEN + IMP_REWRITE_TAC[WORD_BITMANIP_SIMP_LEMMAS;WORD_SUBWORD_MUL] THEN + REWRITE_TAC[DIMINDEX_32;DIMINDEX_64;ARITH_RULE`32<=64`] THEN + AP_THM_TAC THEN AP_TERM_TAC THEN GEN_REWRITE_TAC LAND_CONV [WORD_MUL_SYM] + THEN REFL_TAC);; (* ------------------------------------------------------------------------- *) (* Helpful tactics *) diff --git a/arm/proofs/specifications.txt b/arm/proofs/specifications.txt index 4530141f7..2455d4b07 100644 --- a/arm/proofs/specifications.txt +++ b/arm/proofs/specifications.txt @@ -10,6 +10,7 @@ BIGNUM_AMONTMUL_P256_ALT_SUBROUTINE_CORRECT BIGNUM_AMONTMUL_P256_NEON_SUBROUTINE_CORRECT BIGNUM_AMONTMUL_P256_SUBROUTINE_CORRECT BIGNUM_AMONTMUL_P384_ALT_SUBROUTINE_CORRECT +BIGNUM_AMONTMUL_P384_NEON_SUBROUTINE_CORRECT BIGNUM_AMONTMUL_P384_SUBROUTINE_CORRECT BIGNUM_AMONTMUL_SM2_ALT_SUBROUTINE_CORRECT BIGNUM_AMONTMUL_SM2_SUBROUTINE_CORRECT @@ -19,6 +20,7 @@ BIGNUM_AMONTSQR_P256_ALT_SUBROUTINE_CORRECT BIGNUM_AMONTSQR_P256_NEON_SUBROUTINE_CORRECT BIGNUM_AMONTSQR_P256_SUBROUTINE_CORRECT BIGNUM_AMONTSQR_P384_ALT_SUBROUTINE_CORRECT +BIGNUM_AMONTSQR_P384_NEON_SUBROUTINE_CORRECT BIGNUM_AMONTSQR_P384_SUBROUTINE_CORRECT BIGNUM_AMONTSQR_SM2_ALT_SUBROUTINE_CORRECT BIGNUM_AMONTSQR_SM2_SUBROUTINE_CORRECT @@ -139,6 +141,7 @@ BIGNUM_MONTMUL_P256_ALT_SUBROUTINE_CORRECT BIGNUM_MONTMUL_P256_NEON_SUBROUTINE_CORRECT BIGNUM_MONTMUL_P256_SUBROUTINE_CORRECT BIGNUM_MONTMUL_P384_ALT_SUBROUTINE_CORRECT +BIGNUM_MONTMUL_P384_NEON_SUBROUTINE_CORRECT BIGNUM_MONTMUL_P384_SUBROUTINE_CORRECT BIGNUM_MONTMUL_P521_ALT_SUBROUTINE_CORRECT BIGNUM_MONTMUL_P521_SUBROUTINE_CORRECT @@ -152,6 +155,7 @@ BIGNUM_MONTSQR_P256_ALT_SUBROUTINE_CORRECT BIGNUM_MONTSQR_P256_NEON_SUBROUTINE_CORRECT BIGNUM_MONTSQR_P256_SUBROUTINE_CORRECT BIGNUM_MONTSQR_P384_ALT_SUBROUTINE_CORRECT +BIGNUM_MONTSQR_P384_NEON_SUBROUTINE_CORRECT BIGNUM_MONTSQR_P384_SUBROUTINE_CORRECT BIGNUM_MONTSQR_P521_ALT_SUBROUTINE_CORRECT BIGNUM_MONTSQR_P521_SUBROUTINE_CORRECT diff --git a/benchmarks/benchmark.c b/benchmarks/benchmark.c index 0da87252f..bf20b84d8 100644 --- a/benchmarks/benchmark.c +++ b/benchmarks/benchmark.c @@ -795,7 +795,9 @@ void call_bignum_ksqr_16_32_neon(void) {} void call_bignum_kmul_32_64_neon(void) {} void call_bignum_ksqr_32_64_neon(void) {} void call_bignum_montmul_p256_neon(void) {} +void call_bignum_montmul_p384_neon(void) {} void call_bignum_montsqr_p256_neon(void) {} +void call_bignum_montsqr_p384_neon(void) {} void call_bignum_mul_8_16_neon(void) {} void call_bignum_sqr_8_16_neon(void) {} @@ -816,7 +818,9 @@ void call_bignum_ksqr_16_32_neon(void) repeat(bignum_ksqr_16_32_neon(b0,b1,b2)) void call_bignum_kmul_32_64_neon(void) repeat(bignum_kmul_32_64_neon(b0,b1,b2,b3)) void call_bignum_ksqr_32_64_neon(void) repeat(bignum_ksqr_32_64_neon(b0,b1,b2)) void call_bignum_montmul_p256_neon(void) repeat(bignum_montmul_p256_neon(b0,b1,b2)) +void call_bignum_montmul_p384_neon(void) repeat(bignum_montmul_p384_neon(b0,b1,b2)) void call_bignum_montsqr_p256_neon(void) repeat(bignum_montsqr_p256_neon(b0,b1)) +void call_bignum_montsqr_p384_neon(void) repeat(bignum_montsqr_p384_neon(b0,b1)) void call_bignum_mul_8_16_neon(void) repeat(bignum_mul_8_16_neon(b0,b1,b2)) void call_bignum_sqr_8_16_neon(void) repeat(bignum_sqr_8_16_neon(b0,b1)) @@ -1055,6 +1059,7 @@ int main(int argc, char *argv[]) timingtest(all,"bignum_montmul_p256k1_alt",call_bignum_montmul_p256k1_alt); timingtest(bmi,"bignum_montmul_p384",call_bignum_montmul_p384); timingtest(all,"bignum_montmul_p384_alt",call_bignum_montmul_p384_alt); + timingtest(arm,"bignum_montmul_p384_neon", call_bignum_montmul_p384_neon); timingtest(bmi,"bignum_montmul_p521",call_bignum_montmul_p521); timingtest(all,"bignum_montmul_p521_alt",call_bignum_montmul_p521_alt); timingtest(bmi,"bignum_montmul_sm2",call_bignum_montmul_sm2); @@ -1068,6 +1073,7 @@ int main(int argc, char *argv[]) timingtest(all,"bignum_montsqr_p256k1_alt",call_bignum_montsqr_p256k1_alt); timingtest(bmi,"bignum_montsqr_p384",call_bignum_montsqr_p384); timingtest(all,"bignum_montsqr_p384_alt",call_bignum_montsqr_p384_alt); + timingtest(arm,"bignum_montsqr_p384_neon", call_bignum_montsqr_p384_neon); timingtest(bmi,"bignum_montsqr_p521",call_bignum_montsqr_p521); timingtest(all,"bignum_montsqr_p521_alt",call_bignum_montsqr_p521_alt); timingtest(bmi,"bignum_montsqr_sm2",call_bignum_montsqr_sm2); diff --git a/common/misc.ml b/common/misc.ml index 88b50f5ad..ad9b84e4a 100644 --- a/common/misc.ml +++ b/common/misc.ml @@ -1271,58 +1271,6 @@ let ASSERT_USING_ASM_ARITH_TAC t = (* A few more lemmas about words. *) (* ------------------------------------------------------------------------- *) -let WORD_BITMANIP_SIMP_LEMMAS = prove( - `!(x32:(32)word) (y32:(32)word) (x32_2:(32)word) - (x64:(64)word) (y64:(64)word) (x64_2:(64)word) (y64_2:(64)word) - (y128:(128)word). - // word_subword - word_subword (word_subword y128 (0,64):(64)word) (0,32):(32)word = - word_subword y128 (0,32):(32)word /\ - word_subword (word_subword y128 (64,64):(64)word) (0,32):(32)word = - word_subword y128 (64,32):(32)word /\ - word_subword (word_subword y128 (0,64):(64)word) (32,32):(32)word = - word_subword y128 (32,32):(32)word /\ - word_subword (word_subword y128 (64,64):(64)word) (32,32):(32)word = - word_subword y128 (96,32):(32)word /\ - word_subword - (word 79228162495817593524129366015:(128)word) (64,64):(64)word = - word 4294967295 /\ - word_subword - (word 79228162495817593524129366015:(128)word) (0,64):(64)word = - word 4294967295 /\ - // .. + word_join - word_subword (word_join x32 y32: (64)word) (0,32) = y32 /\ - word_subword (word_join x32 y32: (64)word) (32,32) = x32 /\ - word_subword (word_join x64 y64: (128)word) (0,64) = y64 /\ - word_subword (word_join x64 y64: (128)word) (64,64) = x64 /\ - word_subword (word_join x64 y64: (128)word) (0,32):(32)word = - word_subword y64 (0,32):(32)word /\ - word_subword (word_join x64 y64: (128)word) (32,32):(32)word = - word_subword y64 (32,32):(32)word /\ - word_subword (word_join x64 y64: (128)word) (64,32):(32)word = - word_subword x64 (0,32):(32)word /\ - word_subword (word_join x64 y64: (128)word) (96,32):(32)word = - word_subword x64 (32,32):(32)word /\ - word_subword - (word_join - (word_join x64_2 x64: (128)word) - (word_join y64_2 y64: (128)word): (256)word) - (64,128):(128)word = word_join x64 y64_2 /\ - // .. + word_zx - word_subword (word_zx x64:(128)word) (0,32):(32)word = word_subword x64 (0,32) /\ - word_subword (word_subword x64 (0,128):(128)word) (0,32):(32)word = word_subword x64 (0,32) /\ - word_subword (word_zx x64:(128)word) (32,32):(32)word = word_subword x64 (32,32) /\ - word_subword (word_subword x64 (0,128):(128)word) (32,32):(32)word = word_subword x64 (32,32) /\ - // .. + word_and - word_subword (word_and y128 (word_join x64_2 x64:(128)word)) (64,64) = - word_and (word_subword y128 (64,64):(64)word) x64_2 /\ - word_subword (word_and y128 (word_join x64_2 x64:(128)word)) (0,64) = - word_and (word_subword y128 (0,64):(64)word) x64 /\ - // .. + word_ushr - word_zx (word_subword (word_ushr x64 32) (0,32):(32)word):(64)word = word_ushr x64 32 /\ - word_ushr (word_join x32_2 x32:(64)word) 32 = word_zx x32_2`, - CONV_TAC WORD_BLAST);; - let WORD_ADD_ASSOC_CONSTS = prove( `!(x:(N)word) n m. (word_add (word_add x (word n)) (word m)) = (word_add x (word (n+m)))`, @@ -1366,6 +1314,19 @@ let WORD64_NE_ADD2 = prove( ASM_SIMP_TAC[CONG_CASE] THEN STRIP_TAC THEN ASM_ARITH_TAC);; +let WORD_SUBWORD_MUL = prove + (`!x y:N word. + dimindex(:M) = len /\ len <= dimindex(:N) + ==> word_subword (word_mul x y) (0,len):M word = + word_mul (word_subword x (0,len)) + (word_subword y (0,len))`, + REPEAT STRIP_TAC THEN + REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_MUL; VAL_WORD_SUBWORD] THEN + ASM_REWRITE_TAC(MOD_MOD_EXP_MIN:: + map ARITH_RULE [`2 EXP 0=1`;`x DIV 1=x`;`MIN x x=x`]) THEN + CONV_TAC MOD_DOWN_CONV THEN + AP_TERM_TAC THEN ASM_SIMP_TAC[ARITH_RULE `m <= n ==> MIN n m = m`]);; + (* ------------------------------------------------------------------------- *) (* A few more lemmas about natural numbers. *) (* ------------------------------------------------------------------------- *) diff --git a/include/s2n-bignum-c89.h b/include/s2n-bignum-c89.h index 3d9644fcc..b0b32e787 100644 --- a/include/s2n-bignum-c89.h +++ b/include/s2n-bignum-c89.h @@ -545,6 +545,7 @@ extern void bignum_montmul_p256k1_alt (uint64_t z[4], uint64_t x[4], uint64_t y[ /* Inputs x[6], y[6]; output z[6] */ extern void bignum_montmul_p384 (uint64_t z[6], uint64_t x[6], uint64_t y[6]); extern void bignum_montmul_p384_alt (uint64_t z[6], uint64_t x[6], uint64_t y[6]); +extern void bignum_montmul_p384_neon (uint64_t z[6], uint64_t x[6], uint64_t y[6]); /* Montgomery multiply, z := (x * y / 2^576) mod p_521 */ /* Inputs x[9], y[9]; output z[9] */ @@ -579,6 +580,7 @@ extern void bignum_montsqr_p256k1_alt (uint64_t z[4], uint64_t x[4]); /* Input x[6]; output z[6] */ extern void bignum_montsqr_p384 (uint64_t z[6], uint64_t x[6]); extern void bignum_montsqr_p384_alt (uint64_t z[6], uint64_t x[6]); +extern void bignum_montsqr_p384_neon (uint64_t z[6], uint64_t x[6]); /* Montgomery square, z := (x^2 / 2^576) mod p_521 */ /* Input x[9]; output z[9] */ diff --git a/include/s2n-bignum.h b/include/s2n-bignum.h index 31e352235..50a7bb53d 100644 --- a/include/s2n-bignum.h +++ b/include/s2n-bignum.h @@ -552,6 +552,9 @@ extern void bignum_montmul_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t // Inputs x[6], y[6]; output z[6] extern void bignum_montmul_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t x[S2N_BIGNUM_STATIC 6], uint64_t y[S2N_BIGNUM_STATIC 6]); extern void bignum_montmul_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t x[S2N_BIGNUM_STATIC 6], uint64_t y[S2N_BIGNUM_STATIC 6]); +extern void bignum_montmul_p384_neon(uint64_t z[S2N_BIGNUM_STATIC 6], + uint64_t x[S2N_BIGNUM_STATIC 6], + uint64_t y[S2N_BIGNUM_STATIC 6]); // Montgomery multiply, z := (x * y / 2^576) mod p_521 // Inputs x[9], y[9]; output z[9] @@ -586,6 +589,8 @@ extern void bignum_montsqr_p256k1_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t // Input x[6]; output z[6] extern void bignum_montsqr_p384 (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t x[S2N_BIGNUM_STATIC 6]); extern void bignum_montsqr_p384_alt (uint64_t z[S2N_BIGNUM_STATIC 6], uint64_t x[S2N_BIGNUM_STATIC 6]); +extern void bignum_montsqr_p384_neon(uint64_t z[S2N_BIGNUM_STATIC 6], + uint64_t x[S2N_BIGNUM_STATIC 6]); // Montgomery square, z := (x^2 / 2^576) mod p_521 // Input x[9]; output z[9] diff --git a/tests/test.c b/tests/test.c index fc78983b3..c63b019cc 100644 --- a/tests/test.c +++ b/tests/test.c @@ -6036,9 +6036,10 @@ int test_bignum_montmul_p256k1_alt(void) return 0; } -int test_bignum_montmul_p384(void) +int test_bignum_montmul_p384_specific(const char *name, + void (*f)(uint64_t *z, uint64_t *x, uint64_t *y)) { uint64_t t; - printf("Testing bignum_montmul_p384 with %d cases\n",tests); + printf("Testing %s with %d cases\n",name,tests); int c; for (t = 0; t < tests; ++t) @@ -6046,7 +6047,7 @@ int test_bignum_montmul_p384(void) reference_mod(6,b0,b2,p_384); random_bignum(6,b2); reference_mod(6,b1,b2,p_384); - bignum_montmul_p384(b4,b0,b1); + f(b4,b0,b1); reference_dmontmul(6,b3,b0,b1,p_384,i_384,b5); c = reference_compare(6,b3,6,b4); @@ -6068,40 +6069,28 @@ int test_bignum_montmul_p384(void) return 0; } -int test_bignum_montmul_p384_alt(void) -{ uint64_t t; - printf("Testing bignum_montmul_p384_alt with %d cases\n",tests); +int test_bignum_montmul_p384(void) { + return test_bignum_montmul_p384_specific("bignum_montmul_p384", + bignum_montmul_p384); +} - int c; - for (t = 0; t < tests; ++t) - { random_bignum(6,b2); - reference_mod(6,b0,b2,p_384); - random_bignum(6,b2); - reference_mod(6,b1,b2,p_384); - bignum_montmul_p384_alt(b4,b0,b1); - reference_dmontmul(6,b3,b0,b1,p_384,i_384,b5); +int test_bignum_montmul_p384_alt(void) { + return test_bignum_montmul_p384_specific("bignum_montmul_p384_alt", + bignum_montmul_p384_alt); +} - c = reference_compare(6,b3,6,b4); - if (c != 0) - { printf("### Disparity: [size %4"PRIu64"] " - "2^-384 * ...0x%016"PRIx64" * ...%016"PRIx64" mod p_384 = " - "0x%016"PRIx64"...%016"PRIx64" not 0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(6),b0[0],b1[0],b4[5],b4[0],b3[5],b3[0]); - return 1; - } - else if (VERBOSE) - { printf("OK: [size %4"PRIu64"] " - "2^-384 * ...0x%016"PRIx64" * ...%016"PRIx64" mod p_384 = " - "0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(6),b0[0],b1[0],b4[5],b4[0]); - } - } - printf("All OK\n"); - return 0; +int test_bignum_montmul_p384_neon(void) { +#ifdef __x86_64__ + // Do not call the neon function to avoid a linking failure error. + return 1; +#else + return test_bignum_montmul_p384_specific("bignum_montmul_p384_neon", + bignum_montmul_p384_neon); +#endif } -int test_bignum_montmul_p521(void) -{ uint64_t t; +int test_bignum_montmul_p521(void) { + uint64_t t; printf("Testing bignum_montmul_p521 with %d cases\n",tests); int c; @@ -6433,15 +6422,16 @@ int test_bignum_montsqr_p256k1_alt(void) return 0; } -int test_bignum_montsqr_p384(void) +int test_bignum_montsqr_p384_specific(const char *name, + void (*f)(uint64_t *z, uint64_t *x)) { uint64_t t; - printf("Testing bignum_montsqr_p384 with %d cases\n",tests); + printf("Testing %s with %d cases\n",name,tests); int c; for (t = 0; t < tests; ++t) { random_bignum(6,b2); reference_mod(6,b0,b2,p_384); - bignum_montsqr_p384(b4,b0); + f(b4,b0); reference_dmontmul(6,b3,b0,b0,p_384,i_384,b5); c = reference_compare(6,b3,6,b4); @@ -6463,34 +6453,24 @@ int test_bignum_montsqr_p384(void) return 0; } -int test_bignum_montsqr_p384_alt(void) -{ uint64_t t; - printf("Testing bignum_montsqr_p384_alt with %d cases\n",tests); +int test_bignum_montsqr_p384(void) { + return test_bignum_montsqr_p384_specific("bignum_montsqr_p384", + bignum_montsqr_p384); +} - int c; - for (t = 0; t < tests; ++t) - { random_bignum(6,b2); - reference_mod(6,b0,b2,p_384); - bignum_montsqr_p384_alt(b4,b0); - reference_dmontmul(6,b3,b0,b0,p_384,i_384,b5); +int test_bignum_montsqr_p384_alt(void) { + return test_bignum_montsqr_p384_specific("bignum_montsqr_p384_alt", + bignum_montsqr_p384_alt); +} - c = reference_compare(6,b3,6,b4); - if (c != 0) - { printf("### Disparity: [size %4"PRIu64"] " - "2^-384 * ...0x%016"PRIx64"^2 mod p_384 = " - "0x%016"PRIx64"...%016"PRIx64" not 0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(6),b0[0],b4[5],b4[0],b3[5],b3[0]); - return 1; - } - else if (VERBOSE) - { printf("OK: [size %4"PRIu64"] " - "2^-384 * ...0x%016"PRIx64"^2 mod p_384 = " - "0x%016"PRIx64"...%016"PRIx64"\n", - UINT64_C(6),b0[0],b4[5],b4[0]); - } - } - printf("All OK\n"); - return 0; +int test_bignum_montsqr_p384_neon(void) { +#ifdef __x86_64__ + // Do not call the neon function to avoid a linking failure error. + return 1; +#else + return test_bignum_montsqr_p384_specific("bignum_montsqr_p384_neon", + bignum_montsqr_p384_neon); +#endif } int test_bignum_montsqr_p521(void) @@ -12530,7 +12510,9 @@ int main(int argc, char *argv[]) functionaltest(all,"bignum_ksqr_16_32_neon",test_bignum_ksqr_16_32_neon); functionaltest(all,"bignum_ksqr_32_64_neon",test_bignum_ksqr_32_64_neon); functionaltest(all,"bignum_montmul_p256_neon", test_bignum_montmul_p256_neon); + functionaltest(all,"bignum_montmul_p384_neon", test_bignum_montmul_p384_neon); functionaltest(all,"bignum_montsqr_p256_neon", test_bignum_montsqr_p256_neon); + functionaltest(all,"bignum_montsqr_p384_neon", test_bignum_montsqr_p384_neon); functionaltest(all,"bignum_mul_8_16_neon",test_bignum_mul_8_16_neon); functionaltest(all,"bignum_sqr_8_16_neon",test_bignum_sqr_8_16_neon); }