mirror of
https://github.com/golang/go.git
synced 2026-06-27 19:30:52 +00:00
runtime/,internal/runtime/maps: move hashing function implemented in GOASM to maps package
This is ground work for future CLs that would rewrite GOASM implementations using simd intrinsics. This will allow inlining of hashing function to map functions improving their performance. Change-Id: I5aef1da0d11a9308ca0a22900671f6f47dc820a8 Reviewed-on: https://go-review.googlesource.com/c/go/+/770581 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
464dc3f344
commit
8594bf4621
34 changed files with 1526 additions and 1519 deletions
|
|
@ -51,7 +51,6 @@ func TestIntendedInlining(t *testing.T) {
|
|||
"noescape",
|
||||
"pcvalueCacheKey",
|
||||
"rand32",
|
||||
"readUnaligned32",
|
||||
"readUnaligned64",
|
||||
"releasem",
|
||||
"roundupsize",
|
||||
|
|
@ -113,6 +112,10 @@ func TestIntendedInlining(t *testing.T) {
|
|||
"internal/runtime/math": {
|
||||
"MulUintptr",
|
||||
},
|
||||
"internal/runtime/maps": {
|
||||
"readUnaligned32",
|
||||
"readUnaligned64",
|
||||
},
|
||||
"internal/runtime/sys": {},
|
||||
"compress/flate": {
|
||||
"(*dictDecoder).tryWriteCopy",
|
||||
|
|
@ -262,7 +265,7 @@ func TestIntendedInlining(t *testing.T) {
|
|||
}
|
||||
if bits.UintSize == 64 {
|
||||
// mix is only defined on 64-bit architectures
|
||||
want["runtime"] = append(want["runtime"], "mix")
|
||||
want["internal/runtime/maps"] = append(want["internal/runtime/maps"], "mix")
|
||||
// (*Bool).CompareAndSwap is just over budget on 32-bit systems (386, arm).
|
||||
want["sync/atomic"] = append(want["sync/atomic"], "(*Bool).CompareAndSwap")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ var allowAsmABIPkgs = []string{
|
|||
"internal/runtime/syscall/linux",
|
||||
"internal/runtime/syscall/windows",
|
||||
"internal/runtime/startlinetest",
|
||||
"internal/runtime/maps",
|
||||
}
|
||||
|
||||
// LookupPkgSpecial returns special build properties for the given package path.
|
||||
|
|
|
|||
431
src/internal/runtime/maps/memhash_386.s
Normal file
431
src/internal/runtime/maps/memhash_386.s
Normal file
|
|
@ -0,0 +1,431 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// hash function using AES hardware instructions
|
||||
TEXT ·MemHash32(SB),NOSPLIT,$0-12
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to data
|
||||
MOVL h+4(FP), X0 // seed
|
||||
PINSRD $1, (AX), X0 // data
|
||||
AESENC ·aeskeysched+0(SB), X0
|
||||
AESENC ·aeskeysched+16(SB), X0
|
||||
AESENC ·aeskeysched+32(SB), X0
|
||||
MOVL X0, ret+8(FP)
|
||||
RET
|
||||
noaes:
|
||||
JMP ·memHash32Fallback(SB)
|
||||
|
||||
TEXT ·MemHash64(SB),NOSPLIT,$0-12
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to data
|
||||
MOVQ (AX), X0 // data
|
||||
PINSRD $2, h+4(FP), X0 // seed
|
||||
AESENC ·aeskeysched+0(SB), X0
|
||||
AESENC ·aeskeysched+16(SB), X0
|
||||
AESENC ·aeskeysched+32(SB), X0
|
||||
MOVL X0, ret+8(FP)
|
||||
RET
|
||||
noaes:
|
||||
JMP ·memHash64Fallback(SB)
|
||||
|
||||
TEXT ·MemHash(SB),NOSPLIT,$0-16
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to data
|
||||
MOVL s+8(FP), BX // size
|
||||
LEAL ret+12(FP), DX
|
||||
JMP ·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP ·memHashFallback(SB)
|
||||
|
||||
TEXT ·StrHash(SB),NOSPLIT,$0-12
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to string object
|
||||
MOVL 4(AX), BX // length of string
|
||||
MOVL (AX), AX // string data
|
||||
LEAL ret+8(FP), DX
|
||||
JMP ·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP ·strHashFallback(SB)
|
||||
|
||||
// AX: data
|
||||
// BX: length
|
||||
// DX: address to put return value
|
||||
TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
|
||||
MOVL h+4(FP), X0 // 32 bits of per-table hash seed
|
||||
PINSRW $4, BX, X0 // 16 bits of length
|
||||
PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
|
||||
MOVO X0, X1 // save unscrambled seed
|
||||
PXOR ·aeskeysched(SB), X0 // xor in per-process seed
|
||||
AESENC X0, X0 // scramble seed
|
||||
|
||||
CMPL BX, $16
|
||||
JB aes0to15
|
||||
JE aes16
|
||||
CMPL BX, $32
|
||||
JBE aes17to32
|
||||
CMPL BX, $64
|
||||
JBE aes33to64
|
||||
JMP aes65plus
|
||||
|
||||
aes0to15:
|
||||
TESTL BX, BX
|
||||
JE aes0
|
||||
|
||||
ADDL $16, AX
|
||||
TESTW $0xff0, AX
|
||||
JE endofpage
|
||||
|
||||
// 16 bytes loaded at this address won't cross
|
||||
// a page boundary, so we can load it directly.
|
||||
MOVOU -16(AX), X1
|
||||
ADDL BX, BX
|
||||
PAND masks<>(SB)(BX*8), X1
|
||||
|
||||
final1:
|
||||
PXOR X0, X1 // xor data with seed
|
||||
AESENC X1, X1 // scramble combo 3 times
|
||||
AESENC X1, X1
|
||||
AESENC X1, X1
|
||||
MOVL X1, (DX)
|
||||
RET
|
||||
|
||||
endofpage:
|
||||
// address ends in 1111xxxx. Might be up against
|
||||
// a page boundary, so load ending at last byte.
|
||||
// Then shift bytes down using pshufb.
|
||||
MOVOU -32(AX)(BX*1), X1
|
||||
ADDL BX, BX
|
||||
PSHUFB shifts<>(SB)(BX*8), X1
|
||||
JMP final1
|
||||
|
||||
aes0:
|
||||
// Return scrambled input seed
|
||||
AESENC X0, X0
|
||||
MOVL X0, (DX)
|
||||
RET
|
||||
|
||||
aes16:
|
||||
MOVOU (AX), X1
|
||||
JMP final1
|
||||
|
||||
aes17to32:
|
||||
// make second starting seed
|
||||
PXOR ·aeskeysched+16(SB), X1
|
||||
AESENC X1, X1
|
||||
|
||||
// load data to be hashed
|
||||
MOVOU (AX), X2
|
||||
MOVOU -16(AX)(BX*1), X3
|
||||
|
||||
// xor with seed
|
||||
PXOR X0, X2
|
||||
PXOR X1, X3
|
||||
|
||||
// scramble 3 times
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
// combine results
|
||||
PXOR X3, X2
|
||||
MOVL X2, (DX)
|
||||
RET
|
||||
|
||||
aes33to64:
|
||||
// make 3 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
PXOR ·aeskeysched+16(SB), X1
|
||||
PXOR ·aeskeysched+32(SB), X2
|
||||
PXOR ·aeskeysched+48(SB), X3
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
MOVOU (AX), X4
|
||||
MOVOU 16(AX), X5
|
||||
MOVOU -32(AX)(BX*1), X6
|
||||
MOVOU -16(AX)(BX*1), X7
|
||||
|
||||
PXOR X0, X4
|
||||
PXOR X1, X5
|
||||
PXOR X2, X6
|
||||
PXOR X3, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
PXOR X6, X4
|
||||
PXOR X7, X5
|
||||
PXOR X5, X4
|
||||
MOVL X4, (DX)
|
||||
RET
|
||||
|
||||
aes65plus:
|
||||
// make 3 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
PXOR ·aeskeysched+16(SB), X1
|
||||
PXOR ·aeskeysched+32(SB), X2
|
||||
PXOR ·aeskeysched+48(SB), X3
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
// start with last (possibly overlapping) block
|
||||
MOVOU -64(AX)(BX*1), X4
|
||||
MOVOU -48(AX)(BX*1), X5
|
||||
MOVOU -32(AX)(BX*1), X6
|
||||
MOVOU -16(AX)(BX*1), X7
|
||||
|
||||
// scramble state once
|
||||
AESENC X0, X4
|
||||
AESENC X1, X5
|
||||
AESENC X2, X6
|
||||
AESENC X3, X7
|
||||
|
||||
// compute number of remaining 64-byte blocks
|
||||
DECL BX
|
||||
SHRL $6, BX
|
||||
|
||||
aesloop:
|
||||
// scramble state, xor in a block
|
||||
MOVOU (AX), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVOU 48(AX), X3
|
||||
AESENC X0, X4
|
||||
AESENC X1, X5
|
||||
AESENC X2, X6
|
||||
AESENC X3, X7
|
||||
|
||||
// scramble state
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
ADDL $64, AX
|
||||
DECL BX
|
||||
JNE aesloop
|
||||
|
||||
// 3 more scrambles to finish
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
PXOR X6, X4
|
||||
PXOR X7, X5
|
||||
PXOR X5, X4
|
||||
MOVL X4, (DX)
|
||||
RET
|
||||
|
||||
// simple mask to get rid of data in the high part of the register.
|
||||
DATA masks<>+0x00(SB)/4, $0x00000000
|
||||
DATA masks<>+0x04(SB)/4, $0x00000000
|
||||
DATA masks<>+0x08(SB)/4, $0x00000000
|
||||
DATA masks<>+0x0c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x10(SB)/4, $0x000000ff
|
||||
DATA masks<>+0x14(SB)/4, $0x00000000
|
||||
DATA masks<>+0x18(SB)/4, $0x00000000
|
||||
DATA masks<>+0x1c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x20(SB)/4, $0x0000ffff
|
||||
DATA masks<>+0x24(SB)/4, $0x00000000
|
||||
DATA masks<>+0x28(SB)/4, $0x00000000
|
||||
DATA masks<>+0x2c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x30(SB)/4, $0x00ffffff
|
||||
DATA masks<>+0x34(SB)/4, $0x00000000
|
||||
DATA masks<>+0x38(SB)/4, $0x00000000
|
||||
DATA masks<>+0x3c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x40(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x44(SB)/4, $0x00000000
|
||||
DATA masks<>+0x48(SB)/4, $0x00000000
|
||||
DATA masks<>+0x4c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x50(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x54(SB)/4, $0x000000ff
|
||||
DATA masks<>+0x58(SB)/4, $0x00000000
|
||||
DATA masks<>+0x5c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x60(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x64(SB)/4, $0x0000ffff
|
||||
DATA masks<>+0x68(SB)/4, $0x00000000
|
||||
DATA masks<>+0x6c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x70(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x74(SB)/4, $0x00ffffff
|
||||
DATA masks<>+0x78(SB)/4, $0x00000000
|
||||
DATA masks<>+0x7c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x80(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x84(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x88(SB)/4, $0x00000000
|
||||
DATA masks<>+0x8c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x90(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x94(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x98(SB)/4, $0x000000ff
|
||||
DATA masks<>+0x9c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xa0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xa4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xa8(SB)/4, $0x0000ffff
|
||||
DATA masks<>+0xac(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xb0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xb4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xb8(SB)/4, $0x00ffffff
|
||||
DATA masks<>+0xbc(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xc0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xc4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xc8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xcc(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xd0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xd4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xd8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xdc(SB)/4, $0x000000ff
|
||||
|
||||
DATA masks<>+0xe0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xe4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xe8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xec(SB)/4, $0x0000ffff
|
||||
|
||||
DATA masks<>+0xf0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xf4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xf8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xfc(SB)/4, $0x00ffffff
|
||||
|
||||
GLOBL masks<>(SB),RODATA,$256
|
||||
|
||||
// these are arguments to pshufb. They move data down from
|
||||
// the high bytes of the register to the low bytes of the register.
|
||||
// index is how many bytes to move.
|
||||
DATA shifts<>+0x00(SB)/4, $0x00000000
|
||||
DATA shifts<>+0x04(SB)/4, $0x00000000
|
||||
DATA shifts<>+0x08(SB)/4, $0x00000000
|
||||
DATA shifts<>+0x0c(SB)/4, $0x00000000
|
||||
|
||||
DATA shifts<>+0x10(SB)/4, $0xffffff0f
|
||||
DATA shifts<>+0x14(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x18(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x1c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x20(SB)/4, $0xffff0f0e
|
||||
DATA shifts<>+0x24(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x28(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x2c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
|
||||
DATA shifts<>+0x34(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x38(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x3c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
|
||||
DATA shifts<>+0x44(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x48(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x4c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
|
||||
DATA shifts<>+0x54(SB)/4, $0xffffff0f
|
||||
DATA shifts<>+0x58(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x5c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
|
||||
DATA shifts<>+0x64(SB)/4, $0xffff0f0e
|
||||
DATA shifts<>+0x68(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x6c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
|
||||
DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
|
||||
DATA shifts<>+0x78(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x7c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x80(SB)/4, $0x0b0a0908
|
||||
DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
|
||||
DATA shifts<>+0x88(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x8c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x90(SB)/4, $0x0a090807
|
||||
DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
|
||||
DATA shifts<>+0x98(SB)/4, $0xffffff0f
|
||||
DATA shifts<>+0x9c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xa0(SB)/4, $0x09080706
|
||||
DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
|
||||
DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
|
||||
DATA shifts<>+0xac(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xb0(SB)/4, $0x08070605
|
||||
DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
|
||||
DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
|
||||
DATA shifts<>+0xbc(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xc0(SB)/4, $0x07060504
|
||||
DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
|
||||
DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
|
||||
DATA shifts<>+0xcc(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xd0(SB)/4, $0x06050403
|
||||
DATA shifts<>+0xd4(SB)/4, $0x0a090807
|
||||
DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
|
||||
DATA shifts<>+0xdc(SB)/4, $0xffffff0f
|
||||
|
||||
DATA shifts<>+0xe0(SB)/4, $0x05040302
|
||||
DATA shifts<>+0xe4(SB)/4, $0x09080706
|
||||
DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
|
||||
DATA shifts<>+0xec(SB)/4, $0xffff0f0e
|
||||
|
||||
DATA shifts<>+0xf0(SB)/4, $0x04030201
|
||||
DATA shifts<>+0xf4(SB)/4, $0x08070605
|
||||
DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
|
||||
DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
|
||||
|
||||
GLOBL shifts<>(SB),RODATA,$256
|
||||
|
||||
TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
|
||||
// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
|
||||
MOVL $masks<>(SB), AX
|
||||
MOVL $shifts<>(SB), BX
|
||||
ORL BX, AX
|
||||
TESTL $15, AX
|
||||
SETEQ ret+0(FP)
|
||||
RET
|
||||
23
src/internal/runtime/maps/memhash_aes.go
Normal file
23
src/internal/runtime/maps/memhash_aes.go
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build amd64 || arm64 || 386
|
||||
|
||||
package maps
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func MemHash(p unsafe.Pointer, h, s uintptr) uintptr
|
||||
|
||||
//go:noescape
|
||||
func MemHash32(p unsafe.Pointer, h uintptr) uintptr
|
||||
|
||||
//go:noescape
|
||||
func MemHash64(p unsafe.Pointer, h uintptr) uintptr
|
||||
|
||||
//go:noescape
|
||||
func StrHash(p unsafe.Pointer, h uintptr) uintptr
|
||||
10
src/internal/runtime/maps/memhash_align_check.go
Normal file
10
src/internal/runtime/maps/memhash_align_check.go
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build amd64 || 386
|
||||
|
||||
package maps
|
||||
|
||||
// stub for memhash_{386,amd64}.s
|
||||
func checkMasksAndShiftsAlignment() bool
|
||||
16
src/internal/runtime/maps/memhash_align_nocheck.go
Normal file
16
src/internal/runtime/maps/memhash_align_nocheck.go
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !(amd64 || 386)
|
||||
|
||||
package maps
|
||||
|
||||
func checkMasksAndShiftsAlignment() bool {
|
||||
// This check is only meaningful on amd64/386, where the AES memhash
|
||||
// implementation depends on these globals being properly aligned.
|
||||
//
|
||||
// Return false here so any accidental use on other architectures fails
|
||||
// loudly rather than silently succeeding.
|
||||
return false
|
||||
}
|
||||
481
src/internal/runtime/maps/memhash_amd64.s
Normal file
481
src/internal/runtime/maps/memhash_amd64.s
Normal file
|
|
@ -0,0 +1,481 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func MemHash32(p unsafe.Pointer, h uintptr) uintptr
|
||||
// ABIInternal for performance.
|
||||
TEXT ·MemHash32<ABIInternal>(SB),NOSPLIT,$0-24
|
||||
// AX = ptr to data
|
||||
// BX = seed
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVQ BX, X0 // X0 = seed
|
||||
PINSRD $2, (AX), X0 // data
|
||||
AESENC ·aeskeysched+0(SB), X0
|
||||
AESENC ·aeskeysched+16(SB), X0
|
||||
AESENC ·aeskeysched+32(SB), X0
|
||||
MOVQ X0, AX // return X0
|
||||
RET
|
||||
noaes:
|
||||
JMP ·memHash32Fallback<ABIInternal>(SB)
|
||||
|
||||
// func MemHash64(p unsafe.Pointer, h uintptr) uintptr
|
||||
// ABIInternal for performance.
|
||||
TEXT ·MemHash64<ABIInternal>(SB),NOSPLIT,$0-24
|
||||
// AX = ptr to data
|
||||
// BX = seed
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVQ BX, X0 // X0 = seed
|
||||
PINSRQ $1, (AX), X0 // data
|
||||
AESENC ·aeskeysched+0(SB), X0
|
||||
AESENC ·aeskeysched+16(SB), X0
|
||||
AESENC ·aeskeysched+32(SB), X0
|
||||
MOVQ X0, AX // return X0
|
||||
RET
|
||||
noaes:
|
||||
JMP ·memHash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// func MemHash(p unsafe.Pointer, h, s uintptr) uintptr
|
||||
// hash function using AES hardware instructions
|
||||
TEXT ·MemHash<ABIInternal>(SB),NOSPLIT,$0-32
|
||||
// AX = ptr to data
|
||||
// BX = seed
|
||||
// CX = size
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
JMP ·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP ·memHashFallback<ABIInternal>(SB)
|
||||
|
||||
// func strhash(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT ·StrHash<ABIInternal>(SB),NOSPLIT,$0-24
|
||||
// AX = ptr to string struct
|
||||
// BX = seed
|
||||
CMPB ·UseAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVQ 8(AX), CX // length of string
|
||||
MOVQ (AX), AX // string data
|
||||
JMP ·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP ·strHashFallback<ABIInternal>(SB)
|
||||
|
||||
// AX: data
|
||||
// BX: hash seed
|
||||
// CX: length
|
||||
// At return: AX = return value
|
||||
TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
|
||||
// Fill an SSE register with our seeds.
|
||||
MOVQ BX, X0 // 64 bits of per-table hash seed
|
||||
PINSRW $4, CX, X0 // 16 bits of length
|
||||
PSHUFHW $0, X0, X0 // repeat length 4 times total
|
||||
MOVO X0, X1 // save unscrambled seed
|
||||
PXOR ·aeskeysched(SB), X0 // xor in per-process seed
|
||||
AESENC X0, X0 // scramble seed
|
||||
|
||||
CMPQ CX, $16
|
||||
JB aes0to15
|
||||
JE aes16
|
||||
CMPQ CX, $32
|
||||
JBE aes17to32
|
||||
CMPQ CX, $64
|
||||
JBE aes33to64
|
||||
CMPQ CX, $128
|
||||
JBE aes65to128
|
||||
JMP aes129plus
|
||||
|
||||
aes0to15:
|
||||
TESTQ CX, CX
|
||||
JE aes0
|
||||
|
||||
ADDQ $16, AX
|
||||
TESTW $0xff0, AX
|
||||
JE endofpage
|
||||
|
||||
// 16 bytes loaded at this address won't cross
|
||||
// a page boundary, so we can load it directly.
|
||||
MOVOU -16(AX), X1
|
||||
ADDQ CX, CX
|
||||
MOVQ $masks<>(SB), AX
|
||||
PAND (AX)(CX*8), X1
|
||||
final1:
|
||||
PXOR X0, X1 // xor data with seed
|
||||
AESENC X1, X1 // scramble combo 3 times
|
||||
AESENC X1, X1
|
||||
AESENC X1, X1
|
||||
MOVQ X1, AX // return X1
|
||||
RET
|
||||
|
||||
endofpage:
|
||||
// address ends in 1111xxxx. Might be up against
|
||||
// a page boundary, so load ending at last byte.
|
||||
// Then shift bytes down using pshufb.
|
||||
MOVOU -32(AX)(CX*1), X1
|
||||
ADDQ CX, CX
|
||||
MOVQ $shifts<>(SB), AX
|
||||
PSHUFB (AX)(CX*8), X1
|
||||
JMP final1
|
||||
|
||||
aes0:
|
||||
// Return scrambled input seed
|
||||
AESENC X0, X0
|
||||
MOVQ X0, AX // return X0
|
||||
RET
|
||||
|
||||
aes16:
|
||||
MOVOU (AX), X1
|
||||
JMP final1
|
||||
|
||||
aes17to32:
|
||||
// make second starting seed
|
||||
PXOR ·aeskeysched+16(SB), X1
|
||||
AESENC X1, X1
|
||||
|
||||
// load data to be hashed
|
||||
MOVOU (AX), X2
|
||||
MOVOU -16(AX)(CX*1), X3
|
||||
|
||||
// xor with seed
|
||||
PXOR X0, X2
|
||||
PXOR X1, X3
|
||||
|
||||
// scramble 3 times
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
// combine results
|
||||
PXOR X3, X2
|
||||
MOVQ X2, AX // return X2
|
||||
RET
|
||||
|
||||
aes33to64:
|
||||
// make 3 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
PXOR ·aeskeysched+16(SB), X1
|
||||
PXOR ·aeskeysched+32(SB), X2
|
||||
PXOR ·aeskeysched+48(SB), X3
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
MOVOU (AX), X4
|
||||
MOVOU 16(AX), X5
|
||||
MOVOU -32(AX)(CX*1), X6
|
||||
MOVOU -16(AX)(CX*1), X7
|
||||
|
||||
PXOR X0, X4
|
||||
PXOR X1, X5
|
||||
PXOR X2, X6
|
||||
PXOR X3, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
PXOR X6, X4
|
||||
PXOR X7, X5
|
||||
PXOR X5, X4
|
||||
MOVQ X4, AX // return X4
|
||||
RET
|
||||
|
||||
aes65to128:
|
||||
// make 7 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
MOVO X1, X4
|
||||
MOVO X1, X5
|
||||
MOVO X1, X6
|
||||
MOVO X1, X7
|
||||
PXOR ·aeskeysched+16(SB), X1
|
||||
PXOR ·aeskeysched+32(SB), X2
|
||||
PXOR ·aeskeysched+48(SB), X3
|
||||
PXOR ·aeskeysched+64(SB), X4
|
||||
PXOR ·aeskeysched+80(SB), X5
|
||||
PXOR ·aeskeysched+96(SB), X6
|
||||
PXOR ·aeskeysched+112(SB), X7
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
// load data
|
||||
MOVOU (AX), X8
|
||||
MOVOU 16(AX), X9
|
||||
MOVOU 32(AX), X10
|
||||
MOVOU 48(AX), X11
|
||||
MOVOU -64(AX)(CX*1), X12
|
||||
MOVOU -48(AX)(CX*1), X13
|
||||
MOVOU -32(AX)(CX*1), X14
|
||||
MOVOU -16(AX)(CX*1), X15
|
||||
|
||||
// xor with seed
|
||||
PXOR X0, X8
|
||||
PXOR X1, X9
|
||||
PXOR X2, X10
|
||||
PXOR X3, X11
|
||||
PXOR X4, X12
|
||||
PXOR X5, X13
|
||||
PXOR X6, X14
|
||||
PXOR X7, X15
|
||||
|
||||
// scramble 3 times
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
// combine results
|
||||
PXOR X12, X8
|
||||
PXOR X13, X9
|
||||
PXOR X14, X10
|
||||
PXOR X15, X11
|
||||
PXOR X10, X8
|
||||
PXOR X11, X9
|
||||
PXOR X9, X8
|
||||
// X15 must be zero on return
|
||||
PXOR X15, X15
|
||||
MOVQ X8, AX // return X8
|
||||
RET
|
||||
|
||||
aes129plus:
|
||||
// make 7 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
MOVO X1, X4
|
||||
MOVO X1, X5
|
||||
MOVO X1, X6
|
||||
MOVO X1, X7
|
||||
PXOR ·aeskeysched+16(SB), X1
|
||||
PXOR ·aeskeysched+32(SB), X2
|
||||
PXOR ·aeskeysched+48(SB), X3
|
||||
PXOR ·aeskeysched+64(SB), X4
|
||||
PXOR ·aeskeysched+80(SB), X5
|
||||
PXOR ·aeskeysched+96(SB), X6
|
||||
PXOR ·aeskeysched+112(SB), X7
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
// start with last (possibly overlapping) block
|
||||
MOVOU -128(AX)(CX*1), X8
|
||||
MOVOU -112(AX)(CX*1), X9
|
||||
MOVOU -96(AX)(CX*1), X10
|
||||
MOVOU -80(AX)(CX*1), X11
|
||||
MOVOU -64(AX)(CX*1), X12
|
||||
MOVOU -48(AX)(CX*1), X13
|
||||
MOVOU -32(AX)(CX*1), X14
|
||||
MOVOU -16(AX)(CX*1), X15
|
||||
|
||||
// xor in seed
|
||||
PXOR X0, X8
|
||||
PXOR X1, X9
|
||||
PXOR X2, X10
|
||||
PXOR X3, X11
|
||||
PXOR X4, X12
|
||||
PXOR X5, X13
|
||||
PXOR X6, X14
|
||||
PXOR X7, X15
|
||||
|
||||
// compute number of remaining 128-byte blocks
|
||||
DECQ CX
|
||||
SHRQ $7, CX
|
||||
|
||||
PCALIGN $16
|
||||
aesloop:
|
||||
// scramble state
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
// scramble state, xor in a block
|
||||
MOVOU (AX), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVOU 48(AX), X3
|
||||
AESENC X0, X8
|
||||
AESENC X1, X9
|
||||
AESENC X2, X10
|
||||
AESENC X3, X11
|
||||
MOVOU 64(AX), X4
|
||||
MOVOU 80(AX), X5
|
||||
MOVOU 96(AX), X6
|
||||
MOVOU 112(AX), X7
|
||||
AESENC X4, X12
|
||||
AESENC X5, X13
|
||||
AESENC X6, X14
|
||||
AESENC X7, X15
|
||||
|
||||
ADDQ $128, AX
|
||||
DECQ CX
|
||||
JNE aesloop
|
||||
|
||||
// 3 more scrambles to finish
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
PXOR X12, X8
|
||||
PXOR X13, X9
|
||||
PXOR X14, X10
|
||||
PXOR X15, X11
|
||||
PXOR X10, X8
|
||||
PXOR X11, X9
|
||||
PXOR X9, X8
|
||||
// X15 must be zero on return
|
||||
PXOR X15, X15
|
||||
MOVQ X8, AX // return X8
|
||||
RET
|
||||
|
||||
// simple mask to get rid of data in the high part of the register.
|
||||
DATA masks<>+0x00(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x08(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x10(SB)/8, $0x00000000000000ff
|
||||
DATA masks<>+0x18(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x20(SB)/8, $0x000000000000ffff
|
||||
DATA masks<>+0x28(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
|
||||
DATA masks<>+0x38(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
|
||||
DATA masks<>+0x48(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
|
||||
DATA masks<>+0x58(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
|
||||
DATA masks<>+0x68(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
|
||||
DATA masks<>+0x78(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0x88(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0x98(SB)/8, $0x00000000000000ff
|
||||
DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
|
||||
DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
|
||||
DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
|
||||
DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
|
||||
DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
|
||||
DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
|
||||
GLOBL masks<>(SB),RODATA,$256
|
||||
|
||||
// these are arguments to pshufb. They move data down from
|
||||
// the high bytes of the register to the low bytes of the register.
|
||||
// index is how many bytes to move.
|
||||
DATA shifts<>+0x00(SB)/8, $0x0000000000000000
|
||||
DATA shifts<>+0x08(SB)/8, $0x0000000000000000
|
||||
DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
|
||||
DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
|
||||
DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
|
||||
DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
|
||||
DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
|
||||
DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
|
||||
DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
|
||||
DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
|
||||
DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
|
||||
DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
|
||||
DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
|
||||
DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
|
||||
DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
|
||||
DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
|
||||
DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
|
||||
DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
|
||||
DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
|
||||
DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
|
||||
DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
|
||||
DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
|
||||
DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
|
||||
DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
|
||||
GLOBL shifts<>(SB),RODATA,$256
|
||||
|
||||
TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
|
||||
// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
|
||||
MOVQ $masks<>(SB), AX
|
||||
MOVQ $shifts<>(SB), BX
|
||||
ORQ BX, AX
|
||||
TESTQ $15, AX
|
||||
SETEQ AX
|
||||
RET
|
||||
390
src/internal/runtime/maps/memhash_arm64.s
Normal file
390
src/internal/runtime/maps/memhash_arm64.s
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func MemHash32(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT ·MemHash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
MOVB ·UseAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
MOVD $·aeskeysched+0(SB), R3
|
||||
|
||||
VEOR V0.B16, V0.B16, V0.B16
|
||||
VLD1 (R3), [V2.B16]
|
||||
VLD1 (R0), V0.S[2]
|
||||
VMOV R1, V0.D[0]
|
||||
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
noaes:
|
||||
B ·memHash32Fallback<ABIInternal>(SB)
|
||||
|
||||
// func MemHash64(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT ·MemHash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
MOVB ·UseAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
MOVD $·aeskeysched+0(SB), R3
|
||||
|
||||
VEOR V0.B16, V0.B16, V0.B16
|
||||
VLD1 (R3), [V2.B16]
|
||||
VLD1 (R0), V0.D[1]
|
||||
VMOV R1, V0.D[0]
|
||||
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
noaes:
|
||||
B ·memHash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// func MemHash(p unsafe.Pointer, h, size uintptr) uintptr
|
||||
TEXT ·MemHash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
|
||||
MOVB ·UseAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
B ·aeshashbody<>(SB)
|
||||
noaes:
|
||||
B ·memHashFallback<ABIInternal>(SB)
|
||||
|
||||
// func StrHash(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT ·StrHash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
MOVB ·UseAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
LDP (R0), (R0, R2) // string data / length
|
||||
B ·aeshashbody<>(SB)
|
||||
noaes:
|
||||
B ·strHashFallback<ABIInternal>(SB)
|
||||
|
||||
// R0: data
|
||||
// R1: seed data
|
||||
// R2: length
|
||||
// At return, R0 = return value
|
||||
TEXT ·aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
|
||||
VEOR V30.B16, V30.B16, V30.B16
|
||||
VMOV R1, V30.D[0]
|
||||
VMOV R2, V30.D[1] // load length into seed
|
||||
|
||||
MOVD $·aeskeysched+0(SB), R4
|
||||
VLD1.P 16(R4), [V0.B16]
|
||||
AESE V30.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
CMP $16, R2
|
||||
BLO aes0to15
|
||||
BEQ aes16
|
||||
CMP $32, R2
|
||||
BLS aes17to32
|
||||
CMP $64, R2
|
||||
BLS aes33to64
|
||||
CMP $128, R2
|
||||
BLS aes65to128
|
||||
B aes129plus
|
||||
|
||||
aes0to15:
|
||||
CBZ R2, aes0
|
||||
VEOR V2.B16, V2.B16, V2.B16
|
||||
TBZ $3, R2, less_than_8
|
||||
VLD1.P 8(R0), V2.D[0]
|
||||
|
||||
less_than_8:
|
||||
TBZ $2, R2, less_than_4
|
||||
VLD1.P 4(R0), V2.S[2]
|
||||
|
||||
less_than_4:
|
||||
TBZ $1, R2, less_than_2
|
||||
VLD1.P 2(R0), V2.H[6]
|
||||
|
||||
less_than_2:
|
||||
TBZ $0, R2, done
|
||||
VLD1 (R0), V2.B[14]
|
||||
done:
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
|
||||
VMOV V2.D[0], R0
|
||||
RET
|
||||
|
||||
aes0:
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
|
||||
aes16:
|
||||
VLD1 (R0), [V2.B16]
|
||||
B done
|
||||
|
||||
aes17to32:
|
||||
// make second seed
|
||||
VLD1 (R4), [V1.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
SUB $16, R2, R10
|
||||
VLD1.P (R0)(R10), [V2.B16]
|
||||
VLD1 (R0), [V3.B16]
|
||||
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V1.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V1.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
|
||||
AESE V0.B16, V2.B16
|
||||
AESE V1.B16, V3.B16
|
||||
|
||||
VEOR V3.B16, V2.B16, V2.B16
|
||||
|
||||
VMOV V2.D[0], R0
|
||||
RET
|
||||
|
||||
aes33to64:
|
||||
VLD1 (R4), [V1.B16, V2.B16, V3.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V30.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V30.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
SUB $32, R2, R10
|
||||
|
||||
VLD1.P (R0)(R10), [V4.B16, V5.B16]
|
||||
VLD1 (R0), [V6.B16, V7.B16]
|
||||
|
||||
AESE V0.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V1.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V2.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V3.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V0.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V1.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V2.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V3.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V0.B16, V4.B16
|
||||
AESE V1.B16, V5.B16
|
||||
AESE V2.B16, V6.B16
|
||||
AESE V3.B16, V7.B16
|
||||
|
||||
VEOR V6.B16, V4.B16, V4.B16
|
||||
VEOR V7.B16, V5.B16, V5.B16
|
||||
VEOR V5.B16, V4.B16, V4.B16
|
||||
|
||||
VMOV V4.D[0], R0
|
||||
RET
|
||||
|
||||
aes65to128:
|
||||
VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
|
||||
VLD1 (R4), [V5.B16, V6.B16, V7.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V30.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V30.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V30.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V30.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V30.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V30.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
SUB $64, R2, R10
|
||||
VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
|
||||
VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
|
||||
AESE V0.B16, V8.B16
|
||||
AESMC V8.B16, V8.B16
|
||||
AESE V1.B16, V9.B16
|
||||
AESMC V9.B16, V9.B16
|
||||
AESE V2.B16, V10.B16
|
||||
AESMC V10.B16, V10.B16
|
||||
AESE V3.B16, V11.B16
|
||||
AESMC V11.B16, V11.B16
|
||||
AESE V4.B16, V12.B16
|
||||
AESMC V12.B16, V12.B16
|
||||
AESE V5.B16, V13.B16
|
||||
AESMC V13.B16, V13.B16
|
||||
AESE V6.B16, V14.B16
|
||||
AESMC V14.B16, V14.B16
|
||||
AESE V7.B16, V15.B16
|
||||
AESMC V15.B16, V15.B16
|
||||
|
||||
AESE V0.B16, V8.B16
|
||||
AESMC V8.B16, V8.B16
|
||||
AESE V1.B16, V9.B16
|
||||
AESMC V9.B16, V9.B16
|
||||
AESE V2.B16, V10.B16
|
||||
AESMC V10.B16, V10.B16
|
||||
AESE V3.B16, V11.B16
|
||||
AESMC V11.B16, V11.B16
|
||||
AESE V4.B16, V12.B16
|
||||
AESMC V12.B16, V12.B16
|
||||
AESE V5.B16, V13.B16
|
||||
AESMC V13.B16, V13.B16
|
||||
AESE V6.B16, V14.B16
|
||||
AESMC V14.B16, V14.B16
|
||||
AESE V7.B16, V15.B16
|
||||
AESMC V15.B16, V15.B16
|
||||
|
||||
AESE V0.B16, V8.B16
|
||||
AESE V1.B16, V9.B16
|
||||
AESE V2.B16, V10.B16
|
||||
AESE V3.B16, V11.B16
|
||||
AESE V4.B16, V12.B16
|
||||
AESE V5.B16, V13.B16
|
||||
AESE V6.B16, V14.B16
|
||||
AESE V7.B16, V15.B16
|
||||
|
||||
VEOR V12.B16, V8.B16, V8.B16
|
||||
VEOR V13.B16, V9.B16, V9.B16
|
||||
VEOR V14.B16, V10.B16, V10.B16
|
||||
VEOR V15.B16, V11.B16, V11.B16
|
||||
VEOR V10.B16, V8.B16, V8.B16
|
||||
VEOR V11.B16, V9.B16, V9.B16
|
||||
VEOR V9.B16, V8.B16, V8.B16
|
||||
|
||||
VMOV V8.D[0], R0
|
||||
RET
|
||||
|
||||
aes129plus:
|
||||
PRFM (R0), PLDL1KEEP
|
||||
VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
|
||||
VLD1 (R4), [V5.B16, V6.B16, V7.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V30.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V30.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V30.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V30.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V30.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V30.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
ADD R0, R2, R10
|
||||
SUB $128, R10, R10
|
||||
VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
|
||||
VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
|
||||
SUB $1, R2, R2
|
||||
LSR $7, R2, R2
|
||||
|
||||
aesloop:
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
|
||||
VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
SUB $1, R2, R2
|
||||
CBNZ R2, aesloop
|
||||
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V8.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
|
||||
VEOR V0.B16, V1.B16, V0.B16
|
||||
VEOR V2.B16, V3.B16, V2.B16
|
||||
VEOR V4.B16, V5.B16, V4.B16
|
||||
VEOR V6.B16, V7.B16, V6.B16
|
||||
VEOR V0.B16, V2.B16, V0.B16
|
||||
VEOR V4.B16, V6.B16, V4.B16
|
||||
VEOR V4.B16, V0.B16, V0.B16
|
||||
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
28
src/internal/runtime/maps/memhash_noaes.go
Normal file
28
src/internal/runtime/maps/memhash_noaes.go
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !(amd64 || arm64 || 386)
|
||||
|
||||
package maps
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// AES hashing not implemented for these architectures
|
||||
func MemHash(p unsafe.Pointer, h, s uintptr) uintptr {
|
||||
return memHashFallback(p, h, s)
|
||||
}
|
||||
|
||||
func MemHash32(p unsafe.Pointer, h uintptr) uintptr {
|
||||
return memHash32Fallback(p, h)
|
||||
}
|
||||
|
||||
func MemHash64(p unsafe.Pointer, h uintptr) uintptr {
|
||||
return memHash64Fallback(p, h)
|
||||
}
|
||||
|
||||
func StrHash(p unsafe.Pointer, h uintptr) uintptr {
|
||||
return strHashFallback(p, h)
|
||||
}
|
||||
|
|
@ -16,21 +16,12 @@ import (
|
|||
|
||||
// Functions below pushed from runtime.
|
||||
//
|
||||
//go:noescape
|
||||
//go:linkname memhash32 runtime.memhash32
|
||||
func memhash32(p unsafe.Pointer, h uintptr) uintptr
|
||||
|
||||
//go:noescape
|
||||
//go:linkname memhash64 runtime.memhash64
|
||||
func memhash64(p unsafe.Pointer, h uintptr) uintptr
|
||||
|
||||
//go:noescape
|
||||
//go:linkname strhash runtime.strhash
|
||||
func strhash(a unsafe.Pointer, h uintptr) uintptr
|
||||
|
||||
//go:linkname fatal
|
||||
func fatal(s string)
|
||||
|
||||
//go:linkname bootstrapRand runtime.bootstrapRand
|
||||
func bootstrapRand() uint64
|
||||
|
||||
//go:linkname rand
|
||||
func rand() uint64
|
||||
|
||||
|
|
|
|||
91
src/internal/runtime/maps/runtime_alg.go
Normal file
91
src/internal/runtime/maps/runtime_alg.go
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package maps
|
||||
|
||||
import (
|
||||
"internal/byteorder"
|
||||
"internal/cpu"
|
||||
"internal/goarch"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// runtime variable to check if the processor we're running on
|
||||
// actually supports the instructions used by the AES-based
|
||||
// hash implementation.
|
||||
var UseAeshash bool
|
||||
|
||||
const hashRandomBytes = goarch.PtrSize / 4 * 64
|
||||
|
||||
// used to seed the hash function
|
||||
var aeskeysched [hashRandomBytes]byte
|
||||
|
||||
// used in hash{32,64}.go to seed the hash function
|
||||
var hashkey [4]uintptr
|
||||
|
||||
func AlgInit() {
|
||||
// Install AES hash algorithms if the instructions needed are present.
|
||||
if (goarch.GOARCH == "386" || goarch.GOARCH == "amd64") &&
|
||||
cpu.X86.HasAES && // AESENC
|
||||
cpu.X86.HasSSSE3 && // PSHUFB
|
||||
cpu.X86.HasSSE41 { // PINSR{D,Q}
|
||||
|
||||
// In aeshashbody (that is used by memhash & strhash)
|
||||
// we have global variables that should be properly aligned.
|
||||
//
|
||||
// See #12415
|
||||
if !checkMasksAndShiftsAlignment() {
|
||||
fatal("maps: global variables for AES hashing are not properly aligned!")
|
||||
}
|
||||
initAlgAES()
|
||||
return
|
||||
}
|
||||
if goarch.GOARCH == "arm64" && cpu.ARM64.HasAES {
|
||||
initAlgAES()
|
||||
return
|
||||
}
|
||||
for i := range hashkey {
|
||||
hashkey[i] = uintptr(bootstrapRand())
|
||||
}
|
||||
}
|
||||
|
||||
func initAlgAES() {
|
||||
UseAeshash = true
|
||||
// Initialize with random data so hash collisions will be hard to engineer.
|
||||
key := (*[hashRandomBytes / 8]uint64)(unsafe.Pointer(&aeskeysched))
|
||||
for i := range key {
|
||||
key[i] = bootstrapRand()
|
||||
}
|
||||
}
|
||||
|
||||
func strHashFallback(a unsafe.Pointer, h uintptr) uintptr {
|
||||
type stringStruct struct {
|
||||
str unsafe.Pointer
|
||||
len int
|
||||
}
|
||||
x := (*stringStruct)(a)
|
||||
return memHashFallback(x.str, h, uintptr(x.len))
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(p) + x)
|
||||
}
|
||||
|
||||
// Note: These routines perform the read with a native endianness.
|
||||
func readUnaligned32(p unsafe.Pointer) uint32 {
|
||||
q := (*[4]byte)(p)
|
||||
if goarch.BigEndian {
|
||||
return byteorder.BEUint32(q[:])
|
||||
}
|
||||
return byteorder.LEUint32(q[:])
|
||||
}
|
||||
|
||||
func readUnaligned64(p unsafe.Pointer) uint64 {
|
||||
q := (*[8]byte)(p)
|
||||
if goarch.BigEndian {
|
||||
return byteorder.BEUint64(q[:])
|
||||
}
|
||||
return byteorder.LEUint64(q[:])
|
||||
}
|
||||
|
|
@ -75,7 +75,7 @@ func runtime_mapaccess2_fast32(typ *abi.MapType, m *Map, key uint32) (unsafe.Poi
|
|||
// However, from compiler's perspective, key is no longer address-taken and
|
||||
// filled back in register before the loop.
|
||||
k := key
|
||||
hash := memhash32(unsafe.Pointer(&k), m.seed)
|
||||
hash := MemHash32(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Select table.
|
||||
idx := m.directoryIndex(hash)
|
||||
|
|
@ -169,7 +169,7 @@ func runtime_mapassign_fast32(typ *abi.MapType, m *Map, key uint32) unsafe.Point
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := memhash32(unsafe.Pointer(&k), m.seed)
|
||||
hash := MemHash32(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Set writing after calling Hasher, since Hasher may panic, in which
|
||||
// case we have not actually done a write.
|
||||
|
|
@ -311,7 +311,7 @@ func runtime_mapassign_fast32ptr(typ *abi.MapType, m *Map, key unsafe.Pointer) u
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := memhash32(unsafe.Pointer(&k), m.seed)
|
||||
hash := MemHash32(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Set writing after calling Hasher, since Hasher may panic, in which
|
||||
// case we have not actually done a write.
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ func runtime_mapaccess2_fast64(typ *abi.MapType, m *Map, key uint64) (unsafe.Poi
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := memhash64(unsafe.Pointer(&k), m.seed)
|
||||
hash := MemHash64(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Select table.
|
||||
idx := m.directoryIndex(hash)
|
||||
|
|
@ -161,7 +161,7 @@ func runtime_mapassign_fast64(typ *abi.MapType, m *Map, key uint64) unsafe.Point
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := memhash64(unsafe.Pointer(&k), m.seed)
|
||||
hash := MemHash64(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Set writing after calling Hasher, since Hasher may panic, in which
|
||||
// case we have not actually done a write.
|
||||
|
|
@ -341,7 +341,7 @@ func runtime_mapassign_fast64ptr(typ *abi.MapType, m *Map, key unsafe.Pointer) u
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := memhash64(unsafe.Pointer(&k), m.seed)
|
||||
hash := MemHash64(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Set writing after calling Hasher, since Hasher may panic, in which
|
||||
// case we have not actually done a write.
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ dohash:
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := strhash(unsafe.Pointer(&k), m.seed)
|
||||
hash := StrHash(unsafe.Pointer(&k), m.seed)
|
||||
h2 := uint8(h2(hash))
|
||||
ctrls = *g.ctrls()
|
||||
slotKey = g.key(typ, 0)
|
||||
|
|
@ -149,7 +149,7 @@ func runtime_mapaccess2_faststr(typ *abi.MapType, m *Map, key string) (unsafe.Po
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := strhash(unsafe.Pointer(&k), m.seed)
|
||||
hash := StrHash(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Select table.
|
||||
idx := m.directoryIndex(hash)
|
||||
|
|
@ -245,7 +245,7 @@ func runtime_mapassign_faststr(typ *abi.MapType, m *Map, key string) unsafe.Poin
|
|||
// See the related comment in runtime_mapaccess2_fast32
|
||||
// for why we pass local copy of key.
|
||||
k := key
|
||||
hash := strhash(unsafe.Pointer(&k), m.seed)
|
||||
hash := StrHash(unsafe.Pointer(&k), m.seed)
|
||||
|
||||
// Set writing after calling Hasher, since Hasher may panic, in which
|
||||
// case we have not actually done a write.
|
||||
|
|
|
|||
|
|
@ -7,11 +7,11 @@
|
|||
|
||||
//go:build 386 || arm || mips || mipsle || wasm || (gccgo && (ppc || s390))
|
||||
|
||||
package runtime
|
||||
package maps
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
func memHash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
a, b := mix32(uint32(seed), uint32(4^hashkey[0]))
|
||||
t := readUnaligned32(p)
|
||||
a ^= t
|
||||
|
|
@ -21,7 +21,7 @@ func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
|||
return uintptr(a ^ b)
|
||||
}
|
||||
|
||||
func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
func memHash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
a, b := mix32(uint32(seed), uint32(8^hashkey[0]))
|
||||
a ^= readUnaligned32(p)
|
||||
b ^= readUnaligned32(add(p, 4))
|
||||
|
|
@ -30,7 +30,7 @@ func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
|||
return uintptr(a ^ b)
|
||||
}
|
||||
|
||||
func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
|
||||
func memHashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
|
||||
|
||||
a, b := mix32(uint32(seed), uint32(s^hashkey[0]))
|
||||
if s == 0 {
|
||||
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
//go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x
|
||||
|
||||
package runtime
|
||||
package maps
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
|
|
@ -18,7 +18,7 @@ const (
|
|||
m5 = 0x1d8e4e27c47d124f
|
||||
)
|
||||
|
||||
func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
|
||||
func memHashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
|
||||
var a, b uintptr
|
||||
seed ^= hashkey[0]
|
||||
switch {
|
||||
|
|
@ -64,12 +64,12 @@ func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
|
|||
return mix(m5^s, mix(a^hashkey[1], b^seed))
|
||||
}
|
||||
|
||||
func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
func memHash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
a := r4(p)
|
||||
return mix(m5^4, mix(a^hashkey[1], a^seed^hashkey[0]))
|
||||
}
|
||||
|
||||
func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
func memHash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
a := r8(p)
|
||||
return mix(m5^8, mix(a^hashkey[1], a^seed^hashkey[0]))
|
||||
}
|
||||
|
|
@ -7,8 +7,8 @@ package runtime
|
|||
import (
|
||||
"internal/abi"
|
||||
"internal/byteorder"
|
||||
"internal/cpu"
|
||||
"internal/goarch"
|
||||
"internal/runtime/maps"
|
||||
"internal/runtime/sys"
|
||||
"unsafe"
|
||||
)
|
||||
|
|
@ -54,12 +54,11 @@ func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr {
|
|||
return memhash(p, h, size)
|
||||
}
|
||||
|
||||
// runtime variable to check if the processor we're running on
|
||||
// actually supports the instructions used by the AES-based
|
||||
// hash implementation.
|
||||
var useAeshash bool
|
||||
|
||||
// in asm_*.s
|
||||
// This is simple wrappers.
|
||||
// It's better to use maps.MemHash functions directly,
|
||||
// but we have reflection code that still calls hashing from runtime via LookupRuntime,
|
||||
// so we have to try to minimize overhead of an extra call.
|
||||
// For this add nosplit for performance
|
||||
|
||||
// memhash should be an internal detail,
|
||||
// but widely used packages access it using linkname.
|
||||
|
|
@ -77,16 +76,21 @@ var useAeshash bool
|
|||
// Do not remove or change the type signature.
|
||||
// See go.dev/issue/67401.
|
||||
//
|
||||
//go:nosplit
|
||||
//go:linkname memhash
|
||||
func memhash(p unsafe.Pointer, h, s uintptr) uintptr
|
||||
func memhash(p unsafe.Pointer, h, s uintptr) uintptr {
|
||||
return maps.MemHash(p, h, s)
|
||||
}
|
||||
|
||||
// Accessed in internal/runtime/maps.
|
||||
//
|
||||
//go:linknamestd memhash32
|
||||
func memhash32(p unsafe.Pointer, h uintptr) uintptr
|
||||
//go:nosplit
|
||||
func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
return maps.MemHash64(p, seed)
|
||||
}
|
||||
|
||||
//go:linknamestd memhash64
|
||||
func memhash64(p unsafe.Pointer, h uintptr) uintptr
|
||||
//go:nosplit
|
||||
func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
|
||||
return maps.MemHash32(p, seed)
|
||||
}
|
||||
|
||||
// strhash should be an internal detail,
|
||||
// but widely used packages access it using linkname.
|
||||
|
|
@ -101,11 +105,8 @@ func memhash64(p unsafe.Pointer, h uintptr) uintptr
|
|||
// See go.dev/issue/67401.
|
||||
//
|
||||
//go:linkname strhash
|
||||
func strhash(p unsafe.Pointer, h uintptr) uintptr
|
||||
|
||||
func strhashFallback(a unsafe.Pointer, h uintptr) uintptr {
|
||||
x := (*stringStruct)(a)
|
||||
return memhashFallback(x.str, h, uintptr(x.len))
|
||||
func strhash(p unsafe.Pointer, h uintptr) uintptr {
|
||||
return maps.StrHash(p, h)
|
||||
}
|
||||
|
||||
// NOTE: Because NaN != NaN, a map can contain any
|
||||
|
|
@ -383,50 +384,6 @@ func ifaceHash(i interface {
|
|||
return interhash(noescape(unsafe.Pointer(&i)), seed)
|
||||
}
|
||||
|
||||
const hashRandomBytes = goarch.PtrSize / 4 * 64
|
||||
|
||||
// used in asm_{386,amd64,arm64}.s to seed the hash function
|
||||
var aeskeysched [hashRandomBytes]byte
|
||||
|
||||
// used in hash{32,64}.go to seed the hash function
|
||||
var hashkey [4]uintptr
|
||||
|
||||
func alginit() {
|
||||
// Install AES hash algorithms if the instructions needed are present.
|
||||
if (GOARCH == "386" || GOARCH == "amd64") &&
|
||||
cpu.X86.HasAES && // AESENC
|
||||
cpu.X86.HasSSSE3 && // PSHUFB
|
||||
cpu.X86.HasSSE41 { // PINSR{D,Q}
|
||||
initAlgAES()
|
||||
return
|
||||
}
|
||||
if GOARCH == "arm64" && cpu.ARM64.HasAES {
|
||||
initAlgAES()
|
||||
return
|
||||
}
|
||||
for i := range hashkey {
|
||||
hashkey[i] = uintptr(bootstrapRand())
|
||||
}
|
||||
}
|
||||
|
||||
func initAlgAES() {
|
||||
useAeshash = true
|
||||
// Initialize with random data so hash collisions will be hard to engineer.
|
||||
key := (*[hashRandomBytes / 8]uint64)(unsafe.Pointer(&aeskeysched))
|
||||
for i := range key {
|
||||
key[i] = bootstrapRand()
|
||||
}
|
||||
}
|
||||
|
||||
// Note: These routines perform the read with a native endianness.
|
||||
func readUnaligned32(p unsafe.Pointer) uint32 {
|
||||
q := (*[4]byte)(p)
|
||||
if goarch.BigEndian {
|
||||
return byteorder.BEUint32(q[:])
|
||||
}
|
||||
return byteorder.LEUint32(q[:])
|
||||
}
|
||||
|
||||
func readUnaligned64(p unsafe.Pointer) uint64 {
|
||||
q := (*[8]byte)(p)
|
||||
if goarch.BigEndian {
|
||||
|
|
|
|||
|
|
@ -923,432 +923,6 @@ TEXT ldt0setup<>(SB),NOSPLIT,$16-0
|
|||
TEXT runtime·emptyfunc(SB),0,$0-0
|
||||
RET
|
||||
|
||||
// hash function using AES hardware instructions
|
||||
TEXT runtime·memhash(SB),NOSPLIT,$0-16
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to data
|
||||
MOVL s+8(FP), BX // size
|
||||
LEAL ret+12(FP), DX
|
||||
JMP runtime·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP runtime·memhashFallback(SB)
|
||||
|
||||
TEXT runtime·strhash(SB),NOSPLIT,$0-12
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to string object
|
||||
MOVL 4(AX), BX // length of string
|
||||
MOVL (AX), AX // string data
|
||||
LEAL ret+8(FP), DX
|
||||
JMP runtime·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP runtime·strhashFallback(SB)
|
||||
|
||||
// AX: data
|
||||
// BX: length
|
||||
// DX: address to put return value
|
||||
TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
|
||||
MOVL h+4(FP), X0 // 32 bits of per-table hash seed
|
||||
PINSRW $4, BX, X0 // 16 bits of length
|
||||
PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
|
||||
MOVO X0, X1 // save unscrambled seed
|
||||
PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
|
||||
AESENC X0, X0 // scramble seed
|
||||
|
||||
CMPL BX, $16
|
||||
JB aes0to15
|
||||
JE aes16
|
||||
CMPL BX, $32
|
||||
JBE aes17to32
|
||||
CMPL BX, $64
|
||||
JBE aes33to64
|
||||
JMP aes65plus
|
||||
|
||||
aes0to15:
|
||||
TESTL BX, BX
|
||||
JE aes0
|
||||
|
||||
ADDL $16, AX
|
||||
TESTW $0xff0, AX
|
||||
JE endofpage
|
||||
|
||||
// 16 bytes loaded at this address won't cross
|
||||
// a page boundary, so we can load it directly.
|
||||
MOVOU -16(AX), X1
|
||||
ADDL BX, BX
|
||||
PAND masks<>(SB)(BX*8), X1
|
||||
|
||||
final1:
|
||||
PXOR X0, X1 // xor data with seed
|
||||
AESENC X1, X1 // scramble combo 3 times
|
||||
AESENC X1, X1
|
||||
AESENC X1, X1
|
||||
MOVL X1, (DX)
|
||||
RET
|
||||
|
||||
endofpage:
|
||||
// address ends in 1111xxxx. Might be up against
|
||||
// a page boundary, so load ending at last byte.
|
||||
// Then shift bytes down using pshufb.
|
||||
MOVOU -32(AX)(BX*1), X1
|
||||
ADDL BX, BX
|
||||
PSHUFB shifts<>(SB)(BX*8), X1
|
||||
JMP final1
|
||||
|
||||
aes0:
|
||||
// Return scrambled input seed
|
||||
AESENC X0, X0
|
||||
MOVL X0, (DX)
|
||||
RET
|
||||
|
||||
aes16:
|
||||
MOVOU (AX), X1
|
||||
JMP final1
|
||||
|
||||
aes17to32:
|
||||
// make second starting seed
|
||||
PXOR runtime·aeskeysched+16(SB), X1
|
||||
AESENC X1, X1
|
||||
|
||||
// load data to be hashed
|
||||
MOVOU (AX), X2
|
||||
MOVOU -16(AX)(BX*1), X3
|
||||
|
||||
// xor with seed
|
||||
PXOR X0, X2
|
||||
PXOR X1, X3
|
||||
|
||||
// scramble 3 times
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
// combine results
|
||||
PXOR X3, X2
|
||||
MOVL X2, (DX)
|
||||
RET
|
||||
|
||||
aes33to64:
|
||||
// make 3 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
PXOR runtime·aeskeysched+16(SB), X1
|
||||
PXOR runtime·aeskeysched+32(SB), X2
|
||||
PXOR runtime·aeskeysched+48(SB), X3
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
MOVOU (AX), X4
|
||||
MOVOU 16(AX), X5
|
||||
MOVOU -32(AX)(BX*1), X6
|
||||
MOVOU -16(AX)(BX*1), X7
|
||||
|
||||
PXOR X0, X4
|
||||
PXOR X1, X5
|
||||
PXOR X2, X6
|
||||
PXOR X3, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
PXOR X6, X4
|
||||
PXOR X7, X5
|
||||
PXOR X5, X4
|
||||
MOVL X4, (DX)
|
||||
RET
|
||||
|
||||
aes65plus:
|
||||
// make 3 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
PXOR runtime·aeskeysched+16(SB), X1
|
||||
PXOR runtime·aeskeysched+32(SB), X2
|
||||
PXOR runtime·aeskeysched+48(SB), X3
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
// start with last (possibly overlapping) block
|
||||
MOVOU -64(AX)(BX*1), X4
|
||||
MOVOU -48(AX)(BX*1), X5
|
||||
MOVOU -32(AX)(BX*1), X6
|
||||
MOVOU -16(AX)(BX*1), X7
|
||||
|
||||
// scramble state once
|
||||
AESENC X0, X4
|
||||
AESENC X1, X5
|
||||
AESENC X2, X6
|
||||
AESENC X3, X7
|
||||
|
||||
// compute number of remaining 64-byte blocks
|
||||
DECL BX
|
||||
SHRL $6, BX
|
||||
|
||||
aesloop:
|
||||
// scramble state, xor in a block
|
||||
MOVOU (AX), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVOU 48(AX), X3
|
||||
AESENC X0, X4
|
||||
AESENC X1, X5
|
||||
AESENC X2, X6
|
||||
AESENC X3, X7
|
||||
|
||||
// scramble state
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
ADDL $64, AX
|
||||
DECL BX
|
||||
JNE aesloop
|
||||
|
||||
// 3 more scrambles to finish
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
PXOR X6, X4
|
||||
PXOR X7, X5
|
||||
PXOR X5, X4
|
||||
MOVL X4, (DX)
|
||||
RET
|
||||
|
||||
TEXT runtime·memhash32(SB),NOSPLIT,$0-12
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to data
|
||||
MOVL h+4(FP), X0 // seed
|
||||
PINSRD $1, (AX), X0 // data
|
||||
AESENC runtime·aeskeysched+0(SB), X0
|
||||
AESENC runtime·aeskeysched+16(SB), X0
|
||||
AESENC runtime·aeskeysched+32(SB), X0
|
||||
MOVL X0, ret+8(FP)
|
||||
RET
|
||||
noaes:
|
||||
JMP runtime·memhash32Fallback(SB)
|
||||
|
||||
TEXT runtime·memhash64(SB),NOSPLIT,$0-12
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVL p+0(FP), AX // ptr to data
|
||||
MOVQ (AX), X0 // data
|
||||
PINSRD $2, h+4(FP), X0 // seed
|
||||
AESENC runtime·aeskeysched+0(SB), X0
|
||||
AESENC runtime·aeskeysched+16(SB), X0
|
||||
AESENC runtime·aeskeysched+32(SB), X0
|
||||
MOVL X0, ret+8(FP)
|
||||
RET
|
||||
noaes:
|
||||
JMP runtime·memhash64Fallback(SB)
|
||||
|
||||
// simple mask to get rid of data in the high part of the register.
|
||||
DATA masks<>+0x00(SB)/4, $0x00000000
|
||||
DATA masks<>+0x04(SB)/4, $0x00000000
|
||||
DATA masks<>+0x08(SB)/4, $0x00000000
|
||||
DATA masks<>+0x0c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x10(SB)/4, $0x000000ff
|
||||
DATA masks<>+0x14(SB)/4, $0x00000000
|
||||
DATA masks<>+0x18(SB)/4, $0x00000000
|
||||
DATA masks<>+0x1c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x20(SB)/4, $0x0000ffff
|
||||
DATA masks<>+0x24(SB)/4, $0x00000000
|
||||
DATA masks<>+0x28(SB)/4, $0x00000000
|
||||
DATA masks<>+0x2c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x30(SB)/4, $0x00ffffff
|
||||
DATA masks<>+0x34(SB)/4, $0x00000000
|
||||
DATA masks<>+0x38(SB)/4, $0x00000000
|
||||
DATA masks<>+0x3c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x40(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x44(SB)/4, $0x00000000
|
||||
DATA masks<>+0x48(SB)/4, $0x00000000
|
||||
DATA masks<>+0x4c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x50(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x54(SB)/4, $0x000000ff
|
||||
DATA masks<>+0x58(SB)/4, $0x00000000
|
||||
DATA masks<>+0x5c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x60(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x64(SB)/4, $0x0000ffff
|
||||
DATA masks<>+0x68(SB)/4, $0x00000000
|
||||
DATA masks<>+0x6c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x70(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x74(SB)/4, $0x00ffffff
|
||||
DATA masks<>+0x78(SB)/4, $0x00000000
|
||||
DATA masks<>+0x7c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x80(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x84(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x88(SB)/4, $0x00000000
|
||||
DATA masks<>+0x8c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0x90(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x94(SB)/4, $0xffffffff
|
||||
DATA masks<>+0x98(SB)/4, $0x000000ff
|
||||
DATA masks<>+0x9c(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xa0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xa4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xa8(SB)/4, $0x0000ffff
|
||||
DATA masks<>+0xac(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xb0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xb4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xb8(SB)/4, $0x00ffffff
|
||||
DATA masks<>+0xbc(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xc0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xc4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xc8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xcc(SB)/4, $0x00000000
|
||||
|
||||
DATA masks<>+0xd0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xd4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xd8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xdc(SB)/4, $0x000000ff
|
||||
|
||||
DATA masks<>+0xe0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xe4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xe8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xec(SB)/4, $0x0000ffff
|
||||
|
||||
DATA masks<>+0xf0(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xf4(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xf8(SB)/4, $0xffffffff
|
||||
DATA masks<>+0xfc(SB)/4, $0x00ffffff
|
||||
|
||||
GLOBL masks<>(SB),RODATA,$256
|
||||
|
||||
// these are arguments to pshufb. They move data down from
|
||||
// the high bytes of the register to the low bytes of the register.
|
||||
// index is how many bytes to move.
|
||||
DATA shifts<>+0x00(SB)/4, $0x00000000
|
||||
DATA shifts<>+0x04(SB)/4, $0x00000000
|
||||
DATA shifts<>+0x08(SB)/4, $0x00000000
|
||||
DATA shifts<>+0x0c(SB)/4, $0x00000000
|
||||
|
||||
DATA shifts<>+0x10(SB)/4, $0xffffff0f
|
||||
DATA shifts<>+0x14(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x18(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x1c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x20(SB)/4, $0xffff0f0e
|
||||
DATA shifts<>+0x24(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x28(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x2c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
|
||||
DATA shifts<>+0x34(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x38(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x3c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
|
||||
DATA shifts<>+0x44(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x48(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x4c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
|
||||
DATA shifts<>+0x54(SB)/4, $0xffffff0f
|
||||
DATA shifts<>+0x58(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x5c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
|
||||
DATA shifts<>+0x64(SB)/4, $0xffff0f0e
|
||||
DATA shifts<>+0x68(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x6c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
|
||||
DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
|
||||
DATA shifts<>+0x78(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x7c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x80(SB)/4, $0x0b0a0908
|
||||
DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
|
||||
DATA shifts<>+0x88(SB)/4, $0xffffffff
|
||||
DATA shifts<>+0x8c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0x90(SB)/4, $0x0a090807
|
||||
DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
|
||||
DATA shifts<>+0x98(SB)/4, $0xffffff0f
|
||||
DATA shifts<>+0x9c(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xa0(SB)/4, $0x09080706
|
||||
DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
|
||||
DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
|
||||
DATA shifts<>+0xac(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xb0(SB)/4, $0x08070605
|
||||
DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
|
||||
DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
|
||||
DATA shifts<>+0xbc(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xc0(SB)/4, $0x07060504
|
||||
DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
|
||||
DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
|
||||
DATA shifts<>+0xcc(SB)/4, $0xffffffff
|
||||
|
||||
DATA shifts<>+0xd0(SB)/4, $0x06050403
|
||||
DATA shifts<>+0xd4(SB)/4, $0x0a090807
|
||||
DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
|
||||
DATA shifts<>+0xdc(SB)/4, $0xffffff0f
|
||||
|
||||
DATA shifts<>+0xe0(SB)/4, $0x05040302
|
||||
DATA shifts<>+0xe4(SB)/4, $0x09080706
|
||||
DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
|
||||
DATA shifts<>+0xec(SB)/4, $0xffff0f0e
|
||||
|
||||
DATA shifts<>+0xf0(SB)/4, $0x04030201
|
||||
DATA shifts<>+0xf4(SB)/4, $0x08070605
|
||||
DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
|
||||
DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
|
||||
|
||||
GLOBL shifts<>(SB),RODATA,$256
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
|
||||
MOVL $masks<>(SB), AX
|
||||
MOVL $shifts<>(SB), BX
|
||||
ORL BX, AX
|
||||
TESTL $15, AX
|
||||
SETEQ ret+0(FP)
|
||||
RET
|
||||
|
||||
// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
|
||||
// Must obey the gcc calling convention.
|
||||
TEXT _cgo_topofstack(SB),NOSPLIT,$0
|
||||
|
|
|
|||
|
|
@ -1243,483 +1243,6 @@ fences:
|
|||
RDTSC
|
||||
JMP done
|
||||
|
||||
// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
|
||||
// hash function using AES hardware instructions
|
||||
TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
|
||||
// AX = ptr to data
|
||||
// BX = seed
|
||||
// CX = size
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
JMP runtime·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP runtime·memhashFallback<ABIInternal>(SB)
|
||||
|
||||
// func strhash(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
|
||||
// AX = ptr to string struct
|
||||
// BX = seed
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVQ 8(AX), CX // length of string
|
||||
MOVQ (AX), AX // string data
|
||||
JMP runtime·aeshashbody<>(SB)
|
||||
noaes:
|
||||
JMP runtime·strhashFallback<ABIInternal>(SB)
|
||||
|
||||
// AX: data
|
||||
// BX: hash seed
|
||||
// CX: length
|
||||
// At return: AX = return value
|
||||
TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
|
||||
// Fill an SSE register with our seeds.
|
||||
MOVQ BX, X0 // 64 bits of per-table hash seed
|
||||
PINSRW $4, CX, X0 // 16 bits of length
|
||||
PSHUFHW $0, X0, X0 // repeat length 4 times total
|
||||
MOVO X0, X1 // save unscrambled seed
|
||||
PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
|
||||
AESENC X0, X0 // scramble seed
|
||||
|
||||
CMPQ CX, $16
|
||||
JB aes0to15
|
||||
JE aes16
|
||||
CMPQ CX, $32
|
||||
JBE aes17to32
|
||||
CMPQ CX, $64
|
||||
JBE aes33to64
|
||||
CMPQ CX, $128
|
||||
JBE aes65to128
|
||||
JMP aes129plus
|
||||
|
||||
aes0to15:
|
||||
TESTQ CX, CX
|
||||
JE aes0
|
||||
|
||||
ADDQ $16, AX
|
||||
TESTW $0xff0, AX
|
||||
JE endofpage
|
||||
|
||||
// 16 bytes loaded at this address won't cross
|
||||
// a page boundary, so we can load it directly.
|
||||
MOVOU -16(AX), X1
|
||||
ADDQ CX, CX
|
||||
MOVQ $masks<>(SB), AX
|
||||
PAND (AX)(CX*8), X1
|
||||
final1:
|
||||
PXOR X0, X1 // xor data with seed
|
||||
AESENC X1, X1 // scramble combo 3 times
|
||||
AESENC X1, X1
|
||||
AESENC X1, X1
|
||||
MOVQ X1, AX // return X1
|
||||
RET
|
||||
|
||||
endofpage:
|
||||
// address ends in 1111xxxx. Might be up against
|
||||
// a page boundary, so load ending at last byte.
|
||||
// Then shift bytes down using pshufb.
|
||||
MOVOU -32(AX)(CX*1), X1
|
||||
ADDQ CX, CX
|
||||
MOVQ $shifts<>(SB), AX
|
||||
PSHUFB (AX)(CX*8), X1
|
||||
JMP final1
|
||||
|
||||
aes0:
|
||||
// Return scrambled input seed
|
||||
AESENC X0, X0
|
||||
MOVQ X0, AX // return X0
|
||||
RET
|
||||
|
||||
aes16:
|
||||
MOVOU (AX), X1
|
||||
JMP final1
|
||||
|
||||
aes17to32:
|
||||
// make second starting seed
|
||||
PXOR runtime·aeskeysched+16(SB), X1
|
||||
AESENC X1, X1
|
||||
|
||||
// load data to be hashed
|
||||
MOVOU (AX), X2
|
||||
MOVOU -16(AX)(CX*1), X3
|
||||
|
||||
// xor with seed
|
||||
PXOR X0, X2
|
||||
PXOR X1, X3
|
||||
|
||||
// scramble 3 times
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
// combine results
|
||||
PXOR X3, X2
|
||||
MOVQ X2, AX // return X2
|
||||
RET
|
||||
|
||||
aes33to64:
|
||||
// make 3 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
PXOR runtime·aeskeysched+16(SB), X1
|
||||
PXOR runtime·aeskeysched+32(SB), X2
|
||||
PXOR runtime·aeskeysched+48(SB), X3
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
|
||||
MOVOU (AX), X4
|
||||
MOVOU 16(AX), X5
|
||||
MOVOU -32(AX)(CX*1), X6
|
||||
MOVOU -16(AX)(CX*1), X7
|
||||
|
||||
PXOR X0, X4
|
||||
PXOR X1, X5
|
||||
PXOR X2, X6
|
||||
PXOR X3, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
PXOR X6, X4
|
||||
PXOR X7, X5
|
||||
PXOR X5, X4
|
||||
MOVQ X4, AX // return X4
|
||||
RET
|
||||
|
||||
aes65to128:
|
||||
// make 7 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
MOVO X1, X4
|
||||
MOVO X1, X5
|
||||
MOVO X1, X6
|
||||
MOVO X1, X7
|
||||
PXOR runtime·aeskeysched+16(SB), X1
|
||||
PXOR runtime·aeskeysched+32(SB), X2
|
||||
PXOR runtime·aeskeysched+48(SB), X3
|
||||
PXOR runtime·aeskeysched+64(SB), X4
|
||||
PXOR runtime·aeskeysched+80(SB), X5
|
||||
PXOR runtime·aeskeysched+96(SB), X6
|
||||
PXOR runtime·aeskeysched+112(SB), X7
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
// load data
|
||||
MOVOU (AX), X8
|
||||
MOVOU 16(AX), X9
|
||||
MOVOU 32(AX), X10
|
||||
MOVOU 48(AX), X11
|
||||
MOVOU -64(AX)(CX*1), X12
|
||||
MOVOU -48(AX)(CX*1), X13
|
||||
MOVOU -32(AX)(CX*1), X14
|
||||
MOVOU -16(AX)(CX*1), X15
|
||||
|
||||
// xor with seed
|
||||
PXOR X0, X8
|
||||
PXOR X1, X9
|
||||
PXOR X2, X10
|
||||
PXOR X3, X11
|
||||
PXOR X4, X12
|
||||
PXOR X5, X13
|
||||
PXOR X6, X14
|
||||
PXOR X7, X15
|
||||
|
||||
// scramble 3 times
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
// combine results
|
||||
PXOR X12, X8
|
||||
PXOR X13, X9
|
||||
PXOR X14, X10
|
||||
PXOR X15, X11
|
||||
PXOR X10, X8
|
||||
PXOR X11, X9
|
||||
PXOR X9, X8
|
||||
// X15 must be zero on return
|
||||
PXOR X15, X15
|
||||
MOVQ X8, AX // return X8
|
||||
RET
|
||||
|
||||
aes129plus:
|
||||
// make 7 more starting seeds
|
||||
MOVO X1, X2
|
||||
MOVO X1, X3
|
||||
MOVO X1, X4
|
||||
MOVO X1, X5
|
||||
MOVO X1, X6
|
||||
MOVO X1, X7
|
||||
PXOR runtime·aeskeysched+16(SB), X1
|
||||
PXOR runtime·aeskeysched+32(SB), X2
|
||||
PXOR runtime·aeskeysched+48(SB), X3
|
||||
PXOR runtime·aeskeysched+64(SB), X4
|
||||
PXOR runtime·aeskeysched+80(SB), X5
|
||||
PXOR runtime·aeskeysched+96(SB), X6
|
||||
PXOR runtime·aeskeysched+112(SB), X7
|
||||
AESENC X1, X1
|
||||
AESENC X2, X2
|
||||
AESENC X3, X3
|
||||
AESENC X4, X4
|
||||
AESENC X5, X5
|
||||
AESENC X6, X6
|
||||
AESENC X7, X7
|
||||
|
||||
// start with last (possibly overlapping) block
|
||||
MOVOU -128(AX)(CX*1), X8
|
||||
MOVOU -112(AX)(CX*1), X9
|
||||
MOVOU -96(AX)(CX*1), X10
|
||||
MOVOU -80(AX)(CX*1), X11
|
||||
MOVOU -64(AX)(CX*1), X12
|
||||
MOVOU -48(AX)(CX*1), X13
|
||||
MOVOU -32(AX)(CX*1), X14
|
||||
MOVOU -16(AX)(CX*1), X15
|
||||
|
||||
// xor in seed
|
||||
PXOR X0, X8
|
||||
PXOR X1, X9
|
||||
PXOR X2, X10
|
||||
PXOR X3, X11
|
||||
PXOR X4, X12
|
||||
PXOR X5, X13
|
||||
PXOR X6, X14
|
||||
PXOR X7, X15
|
||||
|
||||
// compute number of remaining 128-byte blocks
|
||||
DECQ CX
|
||||
SHRQ $7, CX
|
||||
|
||||
PCALIGN $16
|
||||
aesloop:
|
||||
// scramble state
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
// scramble state, xor in a block
|
||||
MOVOU (AX), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVOU 48(AX), X3
|
||||
AESENC X0, X8
|
||||
AESENC X1, X9
|
||||
AESENC X2, X10
|
||||
AESENC X3, X11
|
||||
MOVOU 64(AX), X4
|
||||
MOVOU 80(AX), X5
|
||||
MOVOU 96(AX), X6
|
||||
MOVOU 112(AX), X7
|
||||
AESENC X4, X12
|
||||
AESENC X5, X13
|
||||
AESENC X6, X14
|
||||
AESENC X7, X15
|
||||
|
||||
ADDQ $128, AX
|
||||
DECQ CX
|
||||
JNE aesloop
|
||||
|
||||
// 3 more scrambles to finish
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
AESENC X8, X8
|
||||
AESENC X9, X9
|
||||
AESENC X10, X10
|
||||
AESENC X11, X11
|
||||
AESENC X12, X12
|
||||
AESENC X13, X13
|
||||
AESENC X14, X14
|
||||
AESENC X15, X15
|
||||
|
||||
PXOR X12, X8
|
||||
PXOR X13, X9
|
||||
PXOR X14, X10
|
||||
PXOR X15, X11
|
||||
PXOR X10, X8
|
||||
PXOR X11, X9
|
||||
PXOR X9, X8
|
||||
// X15 must be zero on return
|
||||
PXOR X15, X15
|
||||
MOVQ X8, AX // return X8
|
||||
RET
|
||||
|
||||
// func memhash32(p unsafe.Pointer, h uintptr) uintptr
|
||||
// ABIInternal for performance.
|
||||
TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
|
||||
// AX = ptr to data
|
||||
// BX = seed
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVQ BX, X0 // X0 = seed
|
||||
PINSRD $2, (AX), X0 // data
|
||||
AESENC runtime·aeskeysched+0(SB), X0
|
||||
AESENC runtime·aeskeysched+16(SB), X0
|
||||
AESENC runtime·aeskeysched+32(SB), X0
|
||||
MOVQ X0, AX // return X0
|
||||
RET
|
||||
noaes:
|
||||
JMP runtime·memhash32Fallback<ABIInternal>(SB)
|
||||
|
||||
// func memhash64(p unsafe.Pointer, h uintptr) uintptr
|
||||
// ABIInternal for performance.
|
||||
TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
|
||||
// AX = ptr to data
|
||||
// BX = seed
|
||||
CMPB runtime·useAeshash(SB), $0
|
||||
JEQ noaes
|
||||
MOVQ BX, X0 // X0 = seed
|
||||
PINSRQ $1, (AX), X0 // data
|
||||
AESENC runtime·aeskeysched+0(SB), X0
|
||||
AESENC runtime·aeskeysched+16(SB), X0
|
||||
AESENC runtime·aeskeysched+32(SB), X0
|
||||
MOVQ X0, AX // return X0
|
||||
RET
|
||||
noaes:
|
||||
JMP runtime·memhash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// simple mask to get rid of data in the high part of the register.
|
||||
DATA masks<>+0x00(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x08(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x10(SB)/8, $0x00000000000000ff
|
||||
DATA masks<>+0x18(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x20(SB)/8, $0x000000000000ffff
|
||||
DATA masks<>+0x28(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
|
||||
DATA masks<>+0x38(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
|
||||
DATA masks<>+0x48(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
|
||||
DATA masks<>+0x58(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
|
||||
DATA masks<>+0x68(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
|
||||
DATA masks<>+0x78(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0x88(SB)/8, $0x0000000000000000
|
||||
DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0x98(SB)/8, $0x00000000000000ff
|
||||
DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
|
||||
DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
|
||||
DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
|
||||
DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
|
||||
DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
|
||||
DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
|
||||
DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
|
||||
GLOBL masks<>(SB),RODATA,$256
|
||||
|
||||
// func checkASM() bool
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
|
||||
MOVQ $masks<>(SB), AX
|
||||
MOVQ $shifts<>(SB), BX
|
||||
ORQ BX, AX
|
||||
TESTQ $15, AX
|
||||
SETEQ ret+0(FP)
|
||||
RET
|
||||
|
||||
// these are arguments to pshufb. They move data down from
|
||||
// the high bytes of the register to the low bytes of the register.
|
||||
// index is how many bytes to move.
|
||||
DATA shifts<>+0x00(SB)/8, $0x0000000000000000
|
||||
DATA shifts<>+0x08(SB)/8, $0x0000000000000000
|
||||
DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
|
||||
DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
|
||||
DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
|
||||
DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
|
||||
DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
|
||||
DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
|
||||
DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
|
||||
DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
|
||||
DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
|
||||
DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
|
||||
DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
|
||||
DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
|
||||
DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
|
||||
DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
|
||||
DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
|
||||
DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
|
||||
DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
|
||||
DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
|
||||
DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
|
||||
DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
|
||||
DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
|
||||
DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
|
||||
DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
|
||||
GLOBL shifts<>(SB),RODATA,$256
|
||||
|
||||
// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
|
||||
// Must obey the gcc calling convention.
|
||||
TEXT _cgo_topofstack(SB),NOSPLIT,$0
|
||||
|
|
|
|||
|
|
@ -809,16 +809,6 @@ TEXT runtime·armPublicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
|
|||
DMB MB_ST
|
||||
RET
|
||||
|
||||
// AES hashing not implemented for ARM
|
||||
TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-16
|
||||
JMP runtime·memhashFallback(SB)
|
||||
TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-12
|
||||
JMP runtime·strhashFallback(SB)
|
||||
TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-12
|
||||
JMP runtime·memhash32Fallback(SB)
|
||||
TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-12
|
||||
JMP runtime·memhash64Fallback(SB)
|
||||
|
||||
TEXT runtime·procyieldAsm(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVW cycles+0(FP), R1
|
||||
MOVW $0, R0
|
||||
|
|
@ -887,11 +877,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
|
|||
MOVW saver9-4(SP), R9
|
||||
RET
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOVW $1, R3
|
||||
MOVB R3, ret+0(FP)
|
||||
RET
|
||||
|
||||
// gcWriteBarrier informs the GC about heap pointer writes.
|
||||
//
|
||||
// gcWriteBarrier does NOT follow the Go ABI. It accepts the
|
||||
|
|
|
|||
|
|
@ -674,391 +674,6 @@ CALLFN(·call268435456, 268435456)
|
|||
CALLFN(·call536870912, 536870912)
|
||||
CALLFN(·call1073741824, 1073741824)
|
||||
|
||||
// func memhash32(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
MOVB runtime·useAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
MOVD $runtime·aeskeysched+0(SB), R3
|
||||
|
||||
VEOR V0.B16, V0.B16, V0.B16
|
||||
VLD1 (R3), [V2.B16]
|
||||
VLD1 (R0), V0.S[2]
|
||||
VMOV R1, V0.D[0]
|
||||
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
noaes:
|
||||
B runtime·memhash32Fallback<ABIInternal>(SB)
|
||||
|
||||
// func memhash64(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
MOVB runtime·useAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
MOVD $runtime·aeskeysched+0(SB), R3
|
||||
|
||||
VEOR V0.B16, V0.B16, V0.B16
|
||||
VLD1 (R3), [V2.B16]
|
||||
VLD1 (R0), V0.D[1]
|
||||
VMOV R1, V0.D[0]
|
||||
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V2.B16, V0.B16
|
||||
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
noaes:
|
||||
B runtime·memhash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// func memhash(p unsafe.Pointer, h, size uintptr) uintptr
|
||||
TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
|
||||
MOVB runtime·useAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
B runtime·aeshashbody<>(SB)
|
||||
noaes:
|
||||
B runtime·memhashFallback<ABIInternal>(SB)
|
||||
|
||||
// func strhash(p unsafe.Pointer, h uintptr) uintptr
|
||||
TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
MOVB runtime·useAeshash(SB), R10
|
||||
CBZ R10, noaes
|
||||
LDP (R0), (R0, R2) // string data / length
|
||||
B runtime·aeshashbody<>(SB)
|
||||
noaes:
|
||||
B runtime·strhashFallback<ABIInternal>(SB)
|
||||
|
||||
// R0: data
|
||||
// R1: seed data
|
||||
// R2: length
|
||||
// At return, R0 = return value
|
||||
TEXT runtime·aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
|
||||
VEOR V30.B16, V30.B16, V30.B16
|
||||
VMOV R1, V30.D[0]
|
||||
VMOV R2, V30.D[1] // load length into seed
|
||||
|
||||
MOVD $runtime·aeskeysched+0(SB), R4
|
||||
VLD1.P 16(R4), [V0.B16]
|
||||
AESE V30.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
CMP $16, R2
|
||||
BLO aes0to15
|
||||
BEQ aes16
|
||||
CMP $32, R2
|
||||
BLS aes17to32
|
||||
CMP $64, R2
|
||||
BLS aes33to64
|
||||
CMP $128, R2
|
||||
BLS aes65to128
|
||||
B aes129plus
|
||||
|
||||
aes0to15:
|
||||
CBZ R2, aes0
|
||||
VEOR V2.B16, V2.B16, V2.B16
|
||||
TBZ $3, R2, less_than_8
|
||||
VLD1.P 8(R0), V2.D[0]
|
||||
|
||||
less_than_8:
|
||||
TBZ $2, R2, less_than_4
|
||||
VLD1.P 4(R0), V2.S[2]
|
||||
|
||||
less_than_4:
|
||||
TBZ $1, R2, less_than_2
|
||||
VLD1.P 2(R0), V2.H[6]
|
||||
|
||||
less_than_2:
|
||||
TBZ $0, R2, done
|
||||
VLD1 (R0), V2.B[14]
|
||||
done:
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
|
||||
VMOV V2.D[0], R0
|
||||
RET
|
||||
|
||||
aes0:
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
|
||||
aes16:
|
||||
VLD1 (R0), [V2.B16]
|
||||
B done
|
||||
|
||||
aes17to32:
|
||||
// make second seed
|
||||
VLD1 (R4), [V1.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
SUB $16, R2, R10
|
||||
VLD1.P (R0)(R10), [V2.B16]
|
||||
VLD1 (R0), [V3.B16]
|
||||
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V1.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
|
||||
AESE V0.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V1.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
|
||||
AESE V0.B16, V2.B16
|
||||
AESE V1.B16, V3.B16
|
||||
|
||||
VEOR V3.B16, V2.B16, V2.B16
|
||||
|
||||
VMOV V2.D[0], R0
|
||||
RET
|
||||
|
||||
aes33to64:
|
||||
VLD1 (R4), [V1.B16, V2.B16, V3.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V30.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V30.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
SUB $32, R2, R10
|
||||
|
||||
VLD1.P (R0)(R10), [V4.B16, V5.B16]
|
||||
VLD1 (R0), [V6.B16, V7.B16]
|
||||
|
||||
AESE V0.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V1.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V2.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V3.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V0.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V1.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V2.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V3.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V0.B16, V4.B16
|
||||
AESE V1.B16, V5.B16
|
||||
AESE V2.B16, V6.B16
|
||||
AESE V3.B16, V7.B16
|
||||
|
||||
VEOR V6.B16, V4.B16, V4.B16
|
||||
VEOR V7.B16, V5.B16, V5.B16
|
||||
VEOR V5.B16, V4.B16, V4.B16
|
||||
|
||||
VMOV V4.D[0], R0
|
||||
RET
|
||||
|
||||
aes65to128:
|
||||
VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
|
||||
VLD1 (R4), [V5.B16, V6.B16, V7.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V30.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V30.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V30.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V30.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V30.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V30.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
SUB $64, R2, R10
|
||||
VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
|
||||
VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
|
||||
AESE V0.B16, V8.B16
|
||||
AESMC V8.B16, V8.B16
|
||||
AESE V1.B16, V9.B16
|
||||
AESMC V9.B16, V9.B16
|
||||
AESE V2.B16, V10.B16
|
||||
AESMC V10.B16, V10.B16
|
||||
AESE V3.B16, V11.B16
|
||||
AESMC V11.B16, V11.B16
|
||||
AESE V4.B16, V12.B16
|
||||
AESMC V12.B16, V12.B16
|
||||
AESE V5.B16, V13.B16
|
||||
AESMC V13.B16, V13.B16
|
||||
AESE V6.B16, V14.B16
|
||||
AESMC V14.B16, V14.B16
|
||||
AESE V7.B16, V15.B16
|
||||
AESMC V15.B16, V15.B16
|
||||
|
||||
AESE V0.B16, V8.B16
|
||||
AESMC V8.B16, V8.B16
|
||||
AESE V1.B16, V9.B16
|
||||
AESMC V9.B16, V9.B16
|
||||
AESE V2.B16, V10.B16
|
||||
AESMC V10.B16, V10.B16
|
||||
AESE V3.B16, V11.B16
|
||||
AESMC V11.B16, V11.B16
|
||||
AESE V4.B16, V12.B16
|
||||
AESMC V12.B16, V12.B16
|
||||
AESE V5.B16, V13.B16
|
||||
AESMC V13.B16, V13.B16
|
||||
AESE V6.B16, V14.B16
|
||||
AESMC V14.B16, V14.B16
|
||||
AESE V7.B16, V15.B16
|
||||
AESMC V15.B16, V15.B16
|
||||
|
||||
AESE V0.B16, V8.B16
|
||||
AESE V1.B16, V9.B16
|
||||
AESE V2.B16, V10.B16
|
||||
AESE V3.B16, V11.B16
|
||||
AESE V4.B16, V12.B16
|
||||
AESE V5.B16, V13.B16
|
||||
AESE V6.B16, V14.B16
|
||||
AESE V7.B16, V15.B16
|
||||
|
||||
VEOR V12.B16, V8.B16, V8.B16
|
||||
VEOR V13.B16, V9.B16, V9.B16
|
||||
VEOR V14.B16, V10.B16, V10.B16
|
||||
VEOR V15.B16, V11.B16, V11.B16
|
||||
VEOR V10.B16, V8.B16, V8.B16
|
||||
VEOR V11.B16, V9.B16, V9.B16
|
||||
VEOR V9.B16, V8.B16, V8.B16
|
||||
|
||||
VMOV V8.D[0], R0
|
||||
RET
|
||||
|
||||
aes129plus:
|
||||
PRFM (R0), PLDL1KEEP
|
||||
VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
|
||||
VLD1 (R4), [V5.B16, V6.B16, V7.B16]
|
||||
AESE V30.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V30.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V30.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V30.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V30.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V30.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V30.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
ADD R0, R2, R10
|
||||
SUB $128, R10, R10
|
||||
VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
|
||||
VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
|
||||
SUB $1, R2, R2
|
||||
LSR $7, R2, R2
|
||||
|
||||
aesloop:
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
|
||||
VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
SUB $1, R2, R2
|
||||
CBNZ R2, aesloop
|
||||
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V8.B16, V0.B16
|
||||
AESMC V0.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESMC V1.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESMC V2.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESMC V3.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESMC V4.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESMC V5.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESMC V6.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
AESMC V7.B16, V7.B16
|
||||
|
||||
AESE V8.B16, V0.B16
|
||||
AESE V9.B16, V1.B16
|
||||
AESE V10.B16, V2.B16
|
||||
AESE V11.B16, V3.B16
|
||||
AESE V12.B16, V4.B16
|
||||
AESE V13.B16, V5.B16
|
||||
AESE V14.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
|
||||
VEOR V0.B16, V1.B16, V0.B16
|
||||
VEOR V2.B16, V3.B16, V2.B16
|
||||
VEOR V4.B16, V5.B16, V4.B16
|
||||
VEOR V6.B16, V7.B16, V6.B16
|
||||
VEOR V0.B16, V2.B16, V0.B16
|
||||
VEOR V4.B16, V6.B16, V4.B16
|
||||
VEOR V4.B16, V0.B16, V0.B16
|
||||
|
||||
VMOV V0.D[0], R0
|
||||
RET
|
||||
|
||||
// The Arm architecture provides a user space accessible counter-timer which
|
||||
// is incremented at a fixed but machine-specific rate. Software can (spin)
|
||||
// wait until the counter-timer reaches some desired value.
|
||||
|
|
@ -1435,11 +1050,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
|
|||
ADD $0x10, RSP
|
||||
RET
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOVW $1, R3
|
||||
MOVB R3, ret+0(FP)
|
||||
RET
|
||||
|
||||
// gcWriteBarrier informs the GC about heap pointer writes.
|
||||
//
|
||||
// gcWriteBarrier does NOT follow the Go ABI. It accepts the
|
||||
|
|
|
|||
|
|
@ -725,16 +725,6 @@ TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
|
|||
MOVW (R0), R0
|
||||
UNDEF
|
||||
|
||||
// AES hashing not implemented for loong64
|
||||
TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
|
||||
JMP runtime·memhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·strhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash32Fallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
|
||||
// Must obey the gcc calling convention.
|
||||
TEXT _cgo_topofstack(SB),NOSPLIT,$16
|
||||
|
|
@ -771,11 +761,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
|
|||
ADDV $0x10, R3
|
||||
RET
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOVW $1, R19
|
||||
MOVB R19, ret+0(FP)
|
||||
RET
|
||||
|
||||
// spillArgs stores return values from registers to a *internal/abi.RegArgs in R25.
|
||||
TEXT ·spillArgs(SB),NOSPLIT,$0-0
|
||||
MOVV R4, (0*8)(R25)
|
||||
|
|
|
|||
|
|
@ -646,16 +646,6 @@ TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
|
|||
MOVW (R0), R0
|
||||
UNDEF
|
||||
|
||||
// AES hashing not implemented for mips64
|
||||
TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
|
||||
JMP runtime·memhashFallback(SB)
|
||||
TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·strhashFallback(SB)
|
||||
TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash32Fallback(SB)
|
||||
TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash64Fallback(SB)
|
||||
|
||||
// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
|
||||
// Must obey the gcc calling convention.
|
||||
TEXT _cgo_topofstack(SB),NOSPLIT,$16
|
||||
|
|
@ -681,11 +671,6 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
|
|||
// traceback from goexit1 must hit code range of goexit
|
||||
NOR R0, R0 // NOP
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOVW $1, R1
|
||||
MOVB R1, ret+0(FP)
|
||||
RET
|
||||
|
||||
// gcWriteBarrier informs the GC about heap pointer writes.
|
||||
//
|
||||
// gcWriteBarrier does NOT follow the Go ABI. It accepts the
|
||||
|
|
|
|||
|
|
@ -621,16 +621,6 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0
|
|||
TEXT runtime·abort(SB),NOSPLIT,$0-0
|
||||
UNDEF
|
||||
|
||||
// AES hashing not implemented for mips
|
||||
TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-16
|
||||
JMP runtime·memhashFallback(SB)
|
||||
TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-12
|
||||
JMP runtime·strhashFallback(SB)
|
||||
TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-12
|
||||
JMP runtime·memhash32Fallback(SB)
|
||||
TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-12
|
||||
JMP runtime·memhash64Fallback(SB)
|
||||
|
||||
// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
|
||||
// Must obey the gcc calling convention.
|
||||
TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
|
||||
|
|
@ -659,11 +649,6 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
|
|||
// traceback from goexit1 must hit code range of goexit
|
||||
NOR R0, R0 // NOP
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOVW $1, R1
|
||||
MOVB R1, ret+0(FP)
|
||||
RET
|
||||
|
||||
// gcWriteBarrier informs the GC about heap pointer writes.
|
||||
//
|
||||
// gcWriteBarrier does NOT follow the Go ABI. It accepts the
|
||||
|
|
|
|||
|
|
@ -1019,16 +1019,6 @@ TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0
|
|||
FMOVD 184(R20), F12
|
||||
RET
|
||||
|
||||
// AES hashing not implemented for ppc64
|
||||
TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
|
||||
JMP runtime·memhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·strhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash32Fallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
|
||||
// Must obey the gcc calling convention.
|
||||
#ifdef GOOS_aix
|
||||
|
|
@ -1088,11 +1078,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
|
|||
ADD $8, R1
|
||||
RET
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOVW $1, R3
|
||||
MOVB R3, ret+0(FP)
|
||||
RET
|
||||
|
||||
// gcWriteBarrier informs the GC about heap pointer writes.
|
||||
//
|
||||
// gcWriteBarrier does NOT follow the Go ABI. It accepts the
|
||||
|
|
|
|||
|
|
@ -274,16 +274,6 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
|
|||
MOV ZERO, CTXT
|
||||
JMP runtime·morestack(SB)
|
||||
|
||||
// AES hashing not implemented for riscv64
|
||||
TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
|
||||
JMP runtime·memhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·strhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash32Fallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// restore state from Gobuf; longjmp
|
||||
|
||||
// func gogo(buf *gobuf)
|
||||
|
|
@ -739,11 +729,6 @@ TEXT runtime·setg(SB), NOSPLIT, $0-8
|
|||
CALL runtime·save_g(SB)
|
||||
RET
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOV $1, T0
|
||||
MOV T0, ret+0(FP)
|
||||
RET
|
||||
|
||||
// spillArgs stores return values from registers to a *internal/abi.RegArgs in X25.
|
||||
TEXT ·spillArgs(SB),NOSPLIT,$0-0
|
||||
MOV X10, (0*8)(X25)
|
||||
|
|
|
|||
|
|
@ -842,16 +842,6 @@ TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0
|
|||
FMOVD 184(R10), F15
|
||||
RET
|
||||
|
||||
// AES hashing not implemented for s390x
|
||||
TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
|
||||
JMP runtime·memhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·strhashFallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash32Fallback<ABIInternal>(SB)
|
||||
TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash64Fallback<ABIInternal>(SB)
|
||||
|
||||
// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
|
||||
// Must obey the gcc calling convention.
|
||||
TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
|
||||
|
|
@ -904,10 +894,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
|
|||
LMG 48(R15), R6, R15
|
||||
RET
|
||||
|
||||
TEXT ·checkASM(SB),NOSPLIT,$0-1
|
||||
MOVB $1, ret+0(FP)
|
||||
RET
|
||||
|
||||
// gcWriteBarrier informs the GC about heap pointer writes.
|
||||
//
|
||||
// gcWriteBarrier does NOT follow the Go ABI. It accepts the
|
||||
|
|
|
|||
|
|
@ -32,11 +32,6 @@ TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
|
|||
DATA runtime·mainPC+0(SB)/8,$runtime·main(SB)
|
||||
GLOBL runtime·mainPC(SB),RODATA,$8
|
||||
|
||||
// func checkASM() bool
|
||||
TEXT ·checkASM(SB), NOSPLIT, $0-1
|
||||
MOVB $1, ret+0(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·gogo(SB), NOSPLIT, $0-8
|
||||
MOVD buf+0(FP), R0
|
||||
MOVD gobuf_g(R0), R1
|
||||
|
|
@ -183,16 +178,6 @@ TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
|
|||
TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
|
||||
UNDEF
|
||||
|
||||
// AES hashing not implemented for wasm
|
||||
TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
|
||||
JMP runtime·memhashFallback(SB)
|
||||
TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·strhashFallback(SB)
|
||||
TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash32Fallback(SB)
|
||||
TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
|
||||
JMP runtime·memhash64Fallback(SB)
|
||||
|
||||
TEXT runtime·asminit(SB), NOSPLIT, $0-0
|
||||
// No per-thread init.
|
||||
RET
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import (
|
|||
"internal/goos"
|
||||
"internal/runtime/atomic"
|
||||
"internal/runtime/gc"
|
||||
"internal/runtime/maps"
|
||||
"internal/runtime/sys"
|
||||
"unsafe"
|
||||
)
|
||||
|
|
@ -211,7 +212,7 @@ var (
|
|||
IfaceHash = ifaceHash
|
||||
)
|
||||
|
||||
var UseAeshash = &useAeshash
|
||||
var UseAeshash = &maps.UseAeshash
|
||||
|
||||
func MemclrBytes(b []byte) {
|
||||
s := (*slice)(unsafe.Pointer(&b))
|
||||
|
|
@ -254,7 +255,6 @@ func SetTracebackEnv(level string) {
|
|||
traceback_env = traceback_cache
|
||||
}
|
||||
|
||||
var ReadUnaligned32 = readUnaligned32
|
||||
var ReadUnaligned64 = readUnaligned64
|
||||
|
||||
func CountPagesInUse() (pagesInUse, counted uintptr) {
|
||||
|
|
|
|||
|
|
@ -1077,7 +1077,7 @@ func TestMemHashGlobalSeed(t *testing.T) {
|
|||
|
||||
testenv.MustHaveExec(t)
|
||||
|
||||
// aeshash and memhashFallback use separate per-process seeds, so test
|
||||
// aeshash and memHashFallback use separate per-process seeds, so test
|
||||
// both.
|
||||
t.Run("aes", func(t *testing.T) {
|
||||
if !*runtime.UseAeshash {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import (
|
|||
"internal/goos"
|
||||
"internal/runtime/atomic"
|
||||
"internal/runtime/exithook"
|
||||
"internal/runtime/maps"
|
||||
"internal/runtime/sys"
|
||||
"internal/strconv"
|
||||
"internal/stringslite"
|
||||
|
|
@ -878,10 +879,10 @@ func schedinit() {
|
|||
ticks.init() // run as early as possible
|
||||
moduledataverify()
|
||||
stackinit()
|
||||
randinit() // must run before mallocinit, alginit, mcommoninit
|
||||
randinit() // must run before mallocinit, AlgInit, mcommoninit
|
||||
mallocinit()
|
||||
cpuinit(godebug) // must run before alginit
|
||||
alginit() // maps, hash, rand must not be used before this call
|
||||
cpuinit(godebug) // must run before AlgInit
|
||||
maps.AlgInit() // maps, hash, rand must not be used before this call
|
||||
mcommoninit(gp.m, -1)
|
||||
modulesinit() // provides activeModules
|
||||
typelinksinit() // uses maps, activeModules
|
||||
|
|
|
|||
|
|
@ -117,7 +117,10 @@ func allZero(b []byte) bool {
|
|||
return acc == 0
|
||||
}
|
||||
|
||||
// Used in internal/runtime/maps
|
||||
// bootstrapRand returns a random uint64 from the global random generator.
|
||||
//
|
||||
//go:linknamestd bootstrapRand
|
||||
func bootstrapRand() uint64 {
|
||||
lock(&globalRand.lock)
|
||||
if !globalRand.init {
|
||||
|
|
|
|||
|
|
@ -287,10 +287,6 @@ func check() {
|
|||
if fixedStack != round2(fixedStack) {
|
||||
throw("FixedStack is not power-of-2")
|
||||
}
|
||||
|
||||
if !checkASM() {
|
||||
throw("assembly checks failed")
|
||||
}
|
||||
}
|
||||
|
||||
type dbgVar struct {
|
||||
|
|
|
|||
|
|
@ -395,9 +395,6 @@ func divRoundUp(n, a uintptr) uintptr {
|
|||
return (n + a - 1) / a
|
||||
}
|
||||
|
||||
// checkASM reports whether assembly runtime checks have passed.
|
||||
func checkASM() bool
|
||||
|
||||
func memequal_varlen(a, b unsafe.Pointer) bool
|
||||
|
||||
// bool2int returns 0 if x is false or 1 if x is true.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue