mirror of
https://github.com/golang/go.git
synced 2026-06-28 03:40:37 +00:00
crypto/internal/fips140/nistec: optimize P-256 scalar fiat implementation
This reduces the regression of CL 669895 by about half on arm64.
host: linux-amd64_c2s16
3227c963a3c 12743b9f2c8 ccb659cd879 9a11e9167c3
sec/op vs base vs base vs base
Sign/P256-16 33.38µ +5.23% +5.04% +5.41%
Verify/P256-16 74.11µ +2.54% +2.49% +2.69%
GenerateKey/P256-16 14.61µ ~ ~ ~
geomean 33.06µ +2.50% +2.41% +2.63%
B/op vs base vs base vs base
Sign/P256-16 5.922Ki -0.53% -0.53% -0.53%
Verify/P256-16 576.0 -5.56% -5.56% -5.56%
GenerateKey/P256-16 984.0 ~ ~ ~
geomean 1.474Ki -2.06% -2.06% -2.06%
allocs/op vs base vs base vs base
Sign/P256-16 59.00 -1.69% -1.69% -1.69%
Verify/P256-16 10.00 -10.00% -10.00% -10.00%
GenerateKey/P256-16 16.00 ~ ~ ~
geomean 21.13 -4.00% -4.00% -4.00%
host: linux-arm64_c4as16
3227c963a3c 12743b9f2c8 ccb659cd879 9a11e9167c3
sec/op vs base vs base vs base
Sign/P256-16 29.29µ +8.88% +8.94% +5.41%
Verify/P256-16 69.25µ +3.52% +3.48% +2.21%
GenerateKey/P256-16 15.17µ ~ ~ ~
geomean 31.34µ +4.05% +3.97% +2.51%
B/op vs base vs base vs base
Sign/P256-16 5.922Ki -0.53% -0.53% -0.53%
Verify/P256-16 576.0 -5.56% -5.56% -5.56%
GenerateKey/P256-16 984.0 ~ ~ ~
geomean 1.474Ki -2.06% -2.06% -2.06%
allocs/op vs base vs base vs base
Sign/P256-16 59.00 -1.69% -1.69% -1.69%
Verify/P256-16 10.00 -10.00% -10.00% -10.00%
GenerateKey/P256-16 16.00 ~ ~ ~
geomean 21.13 -4.00% -4.00% -4.00%
Change-Id: I69adc8175acf0082dca7c8a13d5f62046a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/749141
Auto-Submit: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Neal Patel <neal@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Neal Patel <nealpatel@google.com>
This commit is contained in:
parent
e4e6887cee
commit
2c659bb4db
1 changed files with 147 additions and 272 deletions
|
|
@ -71,14 +71,8 @@ func P256OrdInverse(k *[4]uint64) {
|
|||
p256OrdFromMontgomery(j, x)
|
||||
}
|
||||
|
||||
func p256OrdSqr(out1, arg1 *p256OrdMontElement, n int) {
|
||||
p256OrdSquare(out1, arg1)
|
||||
for range n - 1 {
|
||||
p256OrdSquare(out1, out1)
|
||||
}
|
||||
}
|
||||
|
||||
// The code below was generated by Fiat Cryptography v0.1.6-63-g92ee794c2.
|
||||
// The code below was generated by Fiat Cryptography v0.1.6-63-g92ee794c2, and
|
||||
// then manually formatted and optimized.
|
||||
//
|
||||
// word-by-word-montgomery --lang Go --no-wide-int
|
||||
// --relax-primitive-carry-to-bitwidth 32,64 --cmovznz-by-mul --static
|
||||
|
|
@ -112,60 +106,12 @@ func p256OrdSqr(out1, arg1 *p256OrdMontElement, n int) {
|
|||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
|
||||
// Autogenerated: fiat_crypto.js word-by-word-montgomery --lang Go --no-wide-int --relax-primitive-carry-to-bitwidth 32,64 --cmovznz-by-mul --static --package-case flatcase --private-function-case camelCase --private-type-case camelCase --no-prefix-fiat --package-name nistec p256Ord 64 2^256-2^224+2^192-89188191075325690597107910205041859247 mul square from_montgomery to_montgomery
|
||||
//
|
||||
// curve description: p256Ord
|
||||
//
|
||||
// machine_wordsize = 64 (from "64")
|
||||
//
|
||||
// requested operations: mul, square, from_montgomery, to_montgomery
|
||||
//
|
||||
// m = 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551 (from "2^256-2^224+2^192-89188191075325690597107910205041859247")
|
||||
//
|
||||
//
|
||||
//
|
||||
// NOTE: In addition to the bounds specified above each function, all
|
||||
//
|
||||
// functions synthesized for this Montgomery arithmetic require the
|
||||
//
|
||||
// input to be strictly less than the prime modulus (m), and also
|
||||
//
|
||||
// require the input to be in the unique saturated representation.
|
||||
//
|
||||
// All functions also ensure that these two properties are true of
|
||||
//
|
||||
// return values.
|
||||
//
|
||||
//
|
||||
//
|
||||
// Computed values:
|
||||
//
|
||||
// eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
|
||||
//
|
||||
// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
|
||||
//
|
||||
// twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in
|
||||
//
|
||||
// if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256
|
||||
|
||||
type p256OrdUint1 = uint64 // We use uint64 instead of a more narrow type for performance reasons; see https://github.com/mit-plv/fiat-crypto/pull/1006#issuecomment-892625927
|
||||
|
||||
// The type p256OrdMontElement is a field element in the Montgomery domain.
|
||||
//
|
||||
// Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
|
||||
// p256OrdMontElement is a scalar field element in the Montgomery domain, as
|
||||
// four uint64 limbs in little-endian order. It must be strictly less than
|
||||
// ord(G) and in Montgomery form (with R 2²⁵⁶).
|
||||
type p256OrdMontElement [4]uint64
|
||||
|
||||
// The function p256OrdMul multiplies two field elements in the Montgomery domain.
|
||||
//
|
||||
// Preconditions:
|
||||
//
|
||||
// 0 ≤ eval arg1 < m
|
||||
// 0 ≤ eval arg2 < m
|
||||
//
|
||||
// Postconditions:
|
||||
//
|
||||
// eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
|
||||
// 0 ≤ eval out1 < m
|
||||
// p256OrdMul multiplies two field elements in the Montgomery domain.
|
||||
func p256OrdMul(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, arg2 *p256OrdMontElement) {
|
||||
x1 := arg1[1]
|
||||
x2 := arg1[2]
|
||||
|
|
@ -282,170 +228,144 @@ func p256OrdMul(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, arg2 *p256Or
|
|||
x210, x211 := bits.Sub64(x201, 0xffffffffffffffff, x209)
|
||||
x212, x213 := bits.Sub64(x203, 0xffffffff00000000, x211)
|
||||
_, x215 := bits.Sub64(x205, 0, x213)
|
||||
var x216 uint64
|
||||
p256OrdCmovznzU64(&x216, x215, x206, x197)
|
||||
var x217 uint64
|
||||
p256OrdCmovznzU64(&x217, x215, x208, x199)
|
||||
var x218 uint64
|
||||
p256OrdCmovznzU64(&x218, x215, x210, x201)
|
||||
var x219 uint64
|
||||
p256OrdCmovznzU64(&x219, x215, x212, x203)
|
||||
out1[0] = x216
|
||||
out1[1] = x217
|
||||
out1[2] = x218
|
||||
out1[3] = x219
|
||||
mask, _ := bits.Sub64(0, 0, x215)
|
||||
out1[0] = x206&^mask | x197&mask
|
||||
out1[1] = x208&^mask | x199&mask
|
||||
out1[2] = x210&^mask | x201&mask
|
||||
out1[3] = x212&^mask | x203&mask
|
||||
}
|
||||
|
||||
// The function p256OrdSquare squares a field element in the Montgomery domain.
|
||||
//
|
||||
// Preconditions:
|
||||
//
|
||||
// 0 ≤ eval arg1 < m
|
||||
//
|
||||
// Postconditions:
|
||||
//
|
||||
// eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
|
||||
// 0 ≤ eval out1 < m
|
||||
func p256OrdSquare(out1 *p256OrdMontElement, arg1 *p256OrdMontElement) {
|
||||
// p256OrdSqr squares n times a field element in the Montgomery domain.
|
||||
func p256OrdSqr(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, n int) {
|
||||
x1 := arg1[1]
|
||||
x2 := arg1[2]
|
||||
x3 := arg1[3]
|
||||
x4 := arg1[0]
|
||||
x6, x5 := bits.Mul64(x4, arg1[3])
|
||||
x8, x7 := bits.Mul64(x4, arg1[2])
|
||||
x10, x9 := bits.Mul64(x4, arg1[1])
|
||||
x12, x11 := bits.Mul64(x4, arg1[0])
|
||||
x13, x14 := bits.Add64(x12, x9, 0)
|
||||
x15, x16 := bits.Add64(x10, x7, x14)
|
||||
x17, x18 := bits.Add64(x8, x5, x16)
|
||||
x19 := x18 + x6
|
||||
_, x20 := bits.Mul64(x11, 0xccd1c8aaee00bc4f)
|
||||
x23, x22 := bits.Mul64(x20, 0xffffffff00000000)
|
||||
x25, x24 := bits.Mul64(x20, 0xffffffffffffffff)
|
||||
x27, x26 := bits.Mul64(x20, 0xbce6faada7179e84)
|
||||
x29, x28 := bits.Mul64(x20, 0xf3b9cac2fc632551)
|
||||
x30, x31 := bits.Add64(x29, x26, 0)
|
||||
x32, x33 := bits.Add64(x27, x24, x31)
|
||||
x34, x35 := bits.Add64(x25, x22, x33)
|
||||
x36 := x35 + x23
|
||||
_, x38 := bits.Add64(x11, x28, 0)
|
||||
x39, x40 := bits.Add64(x13, x30, x38)
|
||||
x41, x42 := bits.Add64(x15, x32, x40)
|
||||
x43, x44 := bits.Add64(x17, x34, x42)
|
||||
x45, x46 := bits.Add64(x19, x36, x44)
|
||||
x48, x47 := bits.Mul64(x1, arg1[3])
|
||||
x50, x49 := bits.Mul64(x1, arg1[2])
|
||||
x52, x51 := bits.Mul64(x1, arg1[1])
|
||||
x54, x53 := bits.Mul64(x1, arg1[0])
|
||||
x55, x56 := bits.Add64(x54, x51, 0)
|
||||
x57, x58 := bits.Add64(x52, x49, x56)
|
||||
x59, x60 := bits.Add64(x50, x47, x58)
|
||||
x61 := x60 + x48
|
||||
x62, x63 := bits.Add64(x39, x53, 0)
|
||||
x64, x65 := bits.Add64(x41, x55, x63)
|
||||
x66, x67 := bits.Add64(x43, x57, x65)
|
||||
x68, x69 := bits.Add64(x45, x59, x67)
|
||||
x70, x71 := bits.Add64(x46, x61, x69)
|
||||
_, x72 := bits.Mul64(x62, 0xccd1c8aaee00bc4f)
|
||||
x75, x74 := bits.Mul64(x72, 0xffffffff00000000)
|
||||
x77, x76 := bits.Mul64(x72, 0xffffffffffffffff)
|
||||
x79, x78 := bits.Mul64(x72, 0xbce6faada7179e84)
|
||||
x81, x80 := bits.Mul64(x72, 0xf3b9cac2fc632551)
|
||||
x82, x83 := bits.Add64(x81, x78, 0)
|
||||
x84, x85 := bits.Add64(x79, x76, x83)
|
||||
x86, x87 := bits.Add64(x77, x74, x85)
|
||||
x88 := x87 + x75
|
||||
_, x90 := bits.Add64(x62, x80, 0)
|
||||
x91, x92 := bits.Add64(x64, x82, x90)
|
||||
x93, x94 := bits.Add64(x66, x84, x92)
|
||||
x95, x96 := bits.Add64(x68, x86, x94)
|
||||
x97, x98 := bits.Add64(x70, x88, x96)
|
||||
x99 := x98 + x71
|
||||
x101, x100 := bits.Mul64(x2, arg1[3])
|
||||
x103, x102 := bits.Mul64(x2, arg1[2])
|
||||
x105, x104 := bits.Mul64(x2, arg1[1])
|
||||
x107, x106 := bits.Mul64(x2, arg1[0])
|
||||
x108, x109 := bits.Add64(x107, x104, 0)
|
||||
x110, x111 := bits.Add64(x105, x102, x109)
|
||||
x112, x113 := bits.Add64(x103, x100, x111)
|
||||
x114 := x113 + x101
|
||||
x115, x116 := bits.Add64(x91, x106, 0)
|
||||
x117, x118 := bits.Add64(x93, x108, x116)
|
||||
x119, x120 := bits.Add64(x95, x110, x118)
|
||||
x121, x122 := bits.Add64(x97, x112, x120)
|
||||
x123, x124 := bits.Add64(x99, x114, x122)
|
||||
_, x125 := bits.Mul64(x115, 0xccd1c8aaee00bc4f)
|
||||
x128, x127 := bits.Mul64(x125, 0xffffffff00000000)
|
||||
x130, x129 := bits.Mul64(x125, 0xffffffffffffffff)
|
||||
x132, x131 := bits.Mul64(x125, 0xbce6faada7179e84)
|
||||
x134, x133 := bits.Mul64(x125, 0xf3b9cac2fc632551)
|
||||
x135, x136 := bits.Add64(x134, x131, 0)
|
||||
x137, x138 := bits.Add64(x132, x129, x136)
|
||||
x139, x140 := bits.Add64(x130, x127, x138)
|
||||
x141 := x140 + x128
|
||||
_, x143 := bits.Add64(x115, x133, 0)
|
||||
x144, x145 := bits.Add64(x117, x135, x143)
|
||||
x146, x147 := bits.Add64(x119, x137, x145)
|
||||
x148, x149 := bits.Add64(x121, x139, x147)
|
||||
x150, x151 := bits.Add64(x123, x141, x149)
|
||||
x152 := x151 + x124
|
||||
x154, x153 := bits.Mul64(x3, arg1[3])
|
||||
x156, x155 := bits.Mul64(x3, arg1[2])
|
||||
x158, x157 := bits.Mul64(x3, arg1[1])
|
||||
x160, x159 := bits.Mul64(x3, arg1[0])
|
||||
x161, x162 := bits.Add64(x160, x157, 0)
|
||||
x163, x164 := bits.Add64(x158, x155, x162)
|
||||
x165, x166 := bits.Add64(x156, x153, x164)
|
||||
x167 := x166 + x154
|
||||
x168, x169 := bits.Add64(x144, x159, 0)
|
||||
x170, x171 := bits.Add64(x146, x161, x169)
|
||||
x172, x173 := bits.Add64(x148, x163, x171)
|
||||
x174, x175 := bits.Add64(x150, x165, x173)
|
||||
x176, x177 := bits.Add64(x152, x167, x175)
|
||||
_, x178 := bits.Mul64(x168, 0xccd1c8aaee00bc4f)
|
||||
x181, x180 := bits.Mul64(x178, 0xffffffff00000000)
|
||||
x183, x182 := bits.Mul64(x178, 0xffffffffffffffff)
|
||||
x185, x184 := bits.Mul64(x178, 0xbce6faada7179e84)
|
||||
x187, x186 := bits.Mul64(x178, 0xf3b9cac2fc632551)
|
||||
x188, x189 := bits.Add64(x187, x184, 0)
|
||||
x190, x191 := bits.Add64(x185, x182, x189)
|
||||
x192, x193 := bits.Add64(x183, x180, x191)
|
||||
x194 := x193 + x181
|
||||
_, x196 := bits.Add64(x168, x186, 0)
|
||||
x197, x198 := bits.Add64(x170, x188, x196)
|
||||
x199, x200 := bits.Add64(x172, x190, x198)
|
||||
x201, x202 := bits.Add64(x174, x192, x200)
|
||||
x203, x204 := bits.Add64(x176, x194, x202)
|
||||
x205 := x204 + x177
|
||||
x206, x207 := bits.Sub64(x197, 0xf3b9cac2fc632551, 0)
|
||||
x208, x209 := bits.Sub64(x199, 0xbce6faada7179e84, x207)
|
||||
x210, x211 := bits.Sub64(x201, 0xffffffffffffffff, x209)
|
||||
x212, x213 := bits.Sub64(x203, 0xffffffff00000000, x211)
|
||||
_, x215 := bits.Sub64(x205, 0, x213)
|
||||
var x216 uint64
|
||||
p256OrdCmovznzU64(&x216, x215, x206, x197)
|
||||
var x217 uint64
|
||||
p256OrdCmovznzU64(&x217, x215, x208, x199)
|
||||
var x218 uint64
|
||||
p256OrdCmovznzU64(&x218, x215, x210, x201)
|
||||
var x219 uint64
|
||||
p256OrdCmovznzU64(&x219, x215, x212, x203)
|
||||
out1[0] = x216
|
||||
out1[1] = x217
|
||||
out1[2] = x218
|
||||
out1[3] = x219
|
||||
for range n {
|
||||
x6, x5 := bits.Mul64(x4, x3)
|
||||
x8, x7 := bits.Mul64(x4, x2)
|
||||
x10, x9 := bits.Mul64(x4, x1)
|
||||
x12, x11 := bits.Mul64(x4, x4)
|
||||
x13, x14 := bits.Add64(x12, x9, 0)
|
||||
x15, x16 := bits.Add64(x10, x7, x14)
|
||||
x17, x18 := bits.Add64(x8, x5, x16)
|
||||
x19 := x18 + x6
|
||||
_, x20 := bits.Mul64(x11, 0xccd1c8aaee00bc4f)
|
||||
x23, x22 := bits.Mul64(x20, 0xffffffff00000000)
|
||||
x25, x24 := bits.Mul64(x20, 0xffffffffffffffff)
|
||||
x27, x26 := bits.Mul64(x20, 0xbce6faada7179e84)
|
||||
x29, x28 := bits.Mul64(x20, 0xf3b9cac2fc632551)
|
||||
x30, x31 := bits.Add64(x29, x26, 0)
|
||||
x32, x33 := bits.Add64(x27, x24, x31)
|
||||
x34, x35 := bits.Add64(x25, x22, x33)
|
||||
x36 := x35 + x23
|
||||
_, x38 := bits.Add64(x11, x28, 0)
|
||||
x39, x40 := bits.Add64(x13, x30, x38)
|
||||
x41, x42 := bits.Add64(x15, x32, x40)
|
||||
x43, x44 := bits.Add64(x17, x34, x42)
|
||||
x45, x46 := bits.Add64(x19, x36, x44)
|
||||
x48, x47 := bits.Mul64(x1, x3)
|
||||
x50, x49 := bits.Mul64(x1, x2)
|
||||
x52, x51 := bits.Mul64(x1, x1)
|
||||
x54, x53 := bits.Mul64(x1, x4)
|
||||
x55, x56 := bits.Add64(x54, x51, 0)
|
||||
x57, x58 := bits.Add64(x52, x49, x56)
|
||||
x59, x60 := bits.Add64(x50, x47, x58)
|
||||
x61 := x60 + x48
|
||||
x62, x63 := bits.Add64(x39, x53, 0)
|
||||
x64, x65 := bits.Add64(x41, x55, x63)
|
||||
x66, x67 := bits.Add64(x43, x57, x65)
|
||||
x68, x69 := bits.Add64(x45, x59, x67)
|
||||
x70, x71 := bits.Add64(x46, x61, x69)
|
||||
_, x72 := bits.Mul64(x62, 0xccd1c8aaee00bc4f)
|
||||
x75, x74 := bits.Mul64(x72, 0xffffffff00000000)
|
||||
x77, x76 := bits.Mul64(x72, 0xffffffffffffffff)
|
||||
x79, x78 := bits.Mul64(x72, 0xbce6faada7179e84)
|
||||
x81, x80 := bits.Mul64(x72, 0xf3b9cac2fc632551)
|
||||
x82, x83 := bits.Add64(x81, x78, 0)
|
||||
x84, x85 := bits.Add64(x79, x76, x83)
|
||||
x86, x87 := bits.Add64(x77, x74, x85)
|
||||
x88 := x87 + x75
|
||||
_, x90 := bits.Add64(x62, x80, 0)
|
||||
x91, x92 := bits.Add64(x64, x82, x90)
|
||||
x93, x94 := bits.Add64(x66, x84, x92)
|
||||
x95, x96 := bits.Add64(x68, x86, x94)
|
||||
x97, x98 := bits.Add64(x70, x88, x96)
|
||||
x99 := x98 + x71
|
||||
x101, x100 := bits.Mul64(x2, x3)
|
||||
x103, x102 := bits.Mul64(x2, x2)
|
||||
x105, x104 := bits.Mul64(x2, x1)
|
||||
x107, x106 := bits.Mul64(x2, x4)
|
||||
x108, x109 := bits.Add64(x107, x104, 0)
|
||||
x110, x111 := bits.Add64(x105, x102, x109)
|
||||
x112, x113 := bits.Add64(x103, x100, x111)
|
||||
x114 := x113 + x101
|
||||
x115, x116 := bits.Add64(x91, x106, 0)
|
||||
x117, x118 := bits.Add64(x93, x108, x116)
|
||||
x119, x120 := bits.Add64(x95, x110, x118)
|
||||
x121, x122 := bits.Add64(x97, x112, x120)
|
||||
x123, x124 := bits.Add64(x99, x114, x122)
|
||||
_, x125 := bits.Mul64(x115, 0xccd1c8aaee00bc4f)
|
||||
x128, x127 := bits.Mul64(x125, 0xffffffff00000000)
|
||||
x130, x129 := bits.Mul64(x125, 0xffffffffffffffff)
|
||||
x132, x131 := bits.Mul64(x125, 0xbce6faada7179e84)
|
||||
x134, x133 := bits.Mul64(x125, 0xf3b9cac2fc632551)
|
||||
x135, x136 := bits.Add64(x134, x131, 0)
|
||||
x137, x138 := bits.Add64(x132, x129, x136)
|
||||
x139, x140 := bits.Add64(x130, x127, x138)
|
||||
x141 := x140 + x128
|
||||
_, x143 := bits.Add64(x115, x133, 0)
|
||||
x144, x145 := bits.Add64(x117, x135, x143)
|
||||
x146, x147 := bits.Add64(x119, x137, x145)
|
||||
x148, x149 := bits.Add64(x121, x139, x147)
|
||||
x150, x151 := bits.Add64(x123, x141, x149)
|
||||
x152 := x151 + x124
|
||||
x154, x153 := bits.Mul64(x3, x3)
|
||||
x156, x155 := bits.Mul64(x3, x2)
|
||||
x158, x157 := bits.Mul64(x3, x1)
|
||||
x160, x159 := bits.Mul64(x3, x4)
|
||||
x161, x162 := bits.Add64(x160, x157, 0)
|
||||
x163, x164 := bits.Add64(x158, x155, x162)
|
||||
x165, x166 := bits.Add64(x156, x153, x164)
|
||||
x167 := x166 + x154
|
||||
x168, x169 := bits.Add64(x144, x159, 0)
|
||||
x170, x171 := bits.Add64(x146, x161, x169)
|
||||
x172, x173 := bits.Add64(x148, x163, x171)
|
||||
x174, x175 := bits.Add64(x150, x165, x173)
|
||||
x176, x177 := bits.Add64(x152, x167, x175)
|
||||
_, x178 := bits.Mul64(x168, 0xccd1c8aaee00bc4f)
|
||||
x181, x180 := bits.Mul64(x178, 0xffffffff00000000)
|
||||
x183, x182 := bits.Mul64(x178, 0xffffffffffffffff)
|
||||
x185, x184 := bits.Mul64(x178, 0xbce6faada7179e84)
|
||||
x187, x186 := bits.Mul64(x178, 0xf3b9cac2fc632551)
|
||||
x188, x189 := bits.Add64(x187, x184, 0)
|
||||
x190, x191 := bits.Add64(x185, x182, x189)
|
||||
x192, x193 := bits.Add64(x183, x180, x191)
|
||||
x194 := x193 + x181
|
||||
_, x196 := bits.Add64(x168, x186, 0)
|
||||
x197, x198 := bits.Add64(x170, x188, x196)
|
||||
x199, x200 := bits.Add64(x172, x190, x198)
|
||||
x201, x202 := bits.Add64(x174, x192, x200)
|
||||
x203, x204 := bits.Add64(x176, x194, x202)
|
||||
x205 := x204 + x177
|
||||
x206, x207 := bits.Sub64(x197, 0xf3b9cac2fc632551, 0)
|
||||
x208, x209 := bits.Sub64(x199, 0xbce6faada7179e84, x207)
|
||||
x210, x211 := bits.Sub64(x201, 0xffffffffffffffff, x209)
|
||||
x212, x213 := bits.Sub64(x203, 0xffffffff00000000, x211)
|
||||
_, x215 := bits.Sub64(x205, 0, x213)
|
||||
mask, _ := bits.Sub64(0, 0, x215)
|
||||
x4 = x206&^mask | x197&mask
|
||||
x1 = x208&^mask | x199&mask
|
||||
x2 = x210&^mask | x201&mask
|
||||
x3 = x212&^mask | x203&mask
|
||||
}
|
||||
out1[0] = x4
|
||||
out1[1] = x1
|
||||
out1[2] = x2
|
||||
out1[3] = x3
|
||||
}
|
||||
|
||||
// The function p256OrdFromMontgomery translates a field element out of the Montgomery domain.
|
||||
//
|
||||
// Preconditions:
|
||||
//
|
||||
// 0 ≤ eval arg1 < m
|
||||
//
|
||||
// Postconditions:
|
||||
//
|
||||
// eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
|
||||
// 0 ≤ eval out1 < m
|
||||
// p256OrdFromMontgomery translates a field element out of the Montgomery domain.
|
||||
func p256OrdFromMontgomery(out1 *p256OrdElement, arg1 *p256OrdMontElement) {
|
||||
x1 := arg1[0]
|
||||
_, x2 := bits.Mul64(x1, 0xccd1c8aaee00bc4f)
|
||||
|
|
@ -510,31 +430,14 @@ func p256OrdFromMontgomery(out1 *p256OrdElement, arg1 *p256OrdMontElement) {
|
|||
x119, x120 := bits.Sub64(x112, 0xbce6faada7179e84, x118)
|
||||
x121, x122 := bits.Sub64(x114, 0xffffffffffffffff, x120)
|
||||
x123, x124 := bits.Sub64(x116, 0xffffffff00000000, x122)
|
||||
_, x126 := bits.Sub64(0, 0, x124)
|
||||
var x127 uint64
|
||||
p256OrdCmovznzU64(&x127, x126, x117, x110)
|
||||
var x128 uint64
|
||||
p256OrdCmovznzU64(&x128, x126, x119, x112)
|
||||
var x129 uint64
|
||||
p256OrdCmovznzU64(&x129, x126, x121, x114)
|
||||
var x130 uint64
|
||||
p256OrdCmovznzU64(&x130, x126, x123, x116)
|
||||
out1[0] = x127
|
||||
out1[1] = x128
|
||||
out1[2] = x129
|
||||
out1[3] = x130
|
||||
mask, _ := bits.Sub64(0, 0, x124)
|
||||
out1[0] = x117&^mask | x110&mask
|
||||
out1[1] = x119&^mask | x112&mask
|
||||
out1[2] = x121&^mask | x114&mask
|
||||
out1[3] = x123&^mask | x116&mask
|
||||
}
|
||||
|
||||
// The function p256OrdToMontgomery translates a field element into the Montgomery domain.
|
||||
//
|
||||
// Preconditions:
|
||||
//
|
||||
// 0 ≤ eval arg1 < m
|
||||
//
|
||||
// Postconditions:
|
||||
//
|
||||
// eval (from_montgomery out1) mod m = eval arg1 mod m
|
||||
// 0 ≤ eval out1 < m
|
||||
// p256OrdToMontgomery translates a field element into the Montgomery domain.
|
||||
func p256OrdToMontgomery(out1 *p256OrdMontElement, arg1 *p256OrdElement) {
|
||||
x1 := arg1[1]
|
||||
x2 := arg1[2]
|
||||
|
|
@ -637,37 +540,9 @@ func p256OrdToMontgomery(out1 *p256OrdMontElement, arg1 *p256OrdElement) {
|
|||
x193, x194 := bits.Sub64(x185, 0xffffffffffffffff, x192)
|
||||
x195, x196 := bits.Sub64(x187, 0xffffffff00000000, x194)
|
||||
_, x198 := bits.Sub64(x188, 0, x196)
|
||||
var x199 uint64
|
||||
p256OrdCmovznzU64(&x199, x198, x189, x181)
|
||||
var x200 uint64
|
||||
p256OrdCmovznzU64(&x200, x198, x191, x183)
|
||||
var x201 uint64
|
||||
p256OrdCmovznzU64(&x201, x198, x193, x185)
|
||||
var x202 uint64
|
||||
p256OrdCmovznzU64(&x202, x198, x195, x187)
|
||||
out1[0] = x199
|
||||
out1[1] = x200
|
||||
out1[2] = x201
|
||||
out1[3] = x202
|
||||
}
|
||||
|
||||
// The function p256OrdCmovznzU64 is a single-word conditional move.
|
||||
//
|
||||
// Postconditions:
|
||||
//
|
||||
// out1 = (if arg1 = 0 then arg2 else arg3)
|
||||
//
|
||||
// Input Bounds:
|
||||
//
|
||||
// arg1: [0x0 ~> 0x1]
|
||||
// arg2: [0x0 ~> 0xffffffffffffffff]
|
||||
// arg3: [0x0 ~> 0xffffffffffffffff]
|
||||
//
|
||||
// Output Bounds:
|
||||
//
|
||||
// out1: [0x0 ~> 0xffffffffffffffff]
|
||||
func p256OrdCmovznzU64(out1 *uint64, arg1 uint64, arg2 uint64, arg3 uint64) {
|
||||
x1 := arg1 * 0xffffffffffffffff
|
||||
x2 := x1&arg3 | ^x1&arg2
|
||||
*out1 = x2
|
||||
mask, _ := bits.Sub64(0, 0, x198)
|
||||
out1[0] = x189&^mask | x181&mask
|
||||
out1[1] = x191&^mask | x183&mask
|
||||
out1[2] = x193&^mask | x185&mask
|
||||
out1[3] = x195&^mask | x187&mask
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue