runtime/,internal/runtime/maps: move hashing function implemented in GOASM to maps package

This is ground work for future CLs that would rewrite GOASM implementations using simd intrinsics. This will allow inlining of hashing function to map functions improving their performance. Change-Id: I5aef1da0d11a9308ca0a22900671f6f47dc820a8 Reviewed-on: https://go-review.googlesource.com/c/go/+/770581 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2026-06-27 19:30:52 +00:00 · 2026-04-24 19:27:10 +03:00 · 2026-04-24 19:27:10 +03:00 · 8594bf4621
commit 8594bf4621
parent 464dc3f344
34 changed files with 1526 additions and 1519 deletions
--- a/src/cmd/compile/internal/test/inl_test.go
+++ b/src/cmd/compile/internal/test/inl_test.go
@ -51,7 +51,6 @@ func TestIntendedInlining(t *testing.T) {
 			"noescape",
 			"pcvalueCacheKey",
 			"rand32",
-			"readUnaligned32",
 			"readUnaligned64",
 			"releasem",
 			"roundupsize",
@ -113,6 +112,10 @@ func TestIntendedInlining(t *testing.T) {
 		"internal/runtime/math": {
 			"MulUintptr",
 		},
+		"internal/runtime/maps": {
+			"readUnaligned32",
+			"readUnaligned64",
+		},
 		"internal/runtime/sys": {},
 		"compress/flate": {
 			"(*dictDecoder).tryWriteCopy",
@ -262,7 +265,7 @@ func TestIntendedInlining(t *testing.T) {
 	}
 	if bits.UintSize == 64 {
 		// mix is only defined on 64-bit architectures
-		want["runtime"] = append(want["runtime"], "mix")
+		want["internal/runtime/maps"] = append(want["internal/runtime/maps"], "mix")
 		// (*Bool).CompareAndSwap is just over budget on 32-bit systems (386, arm).
 		want["sync/atomic"] = append(want["sync/atomic"], "(*Bool).CompareAndSwap")
 	}
--- a/src/cmd/internal/objabi/pkgspecial.go
+++ b/src/cmd/internal/objabi/pkgspecial.go
@ -99,6 +99,7 @@ var allowAsmABIPkgs = []string{
 	"internal/runtime/syscall/linux",
 	"internal/runtime/syscall/windows",
 	"internal/runtime/startlinetest",
+	"internal/runtime/maps",
 }

 // LookupPkgSpecial returns special build properties for the given package path.
--- a/src/internal/runtime/maps/memhash_386.s
+++ b/src/internal/runtime/maps/memhash_386.s
@ -0,0 +1,431 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// hash function using AES hardware instructions
+TEXT ·MemHash32(SB),NOSPLIT,$0-12
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	MOVL	p+0(FP), AX	// ptr to data
+	MOVL	h+4(FP), X0	// seed
+	PINSRD	$1, (AX), X0	// data
+	AESENC	·aeskeysched+0(SB), X0
+	AESENC	·aeskeysched+16(SB), X0
+	AESENC	·aeskeysched+32(SB), X0
+	MOVL	X0, ret+8(FP)
+	RET
+noaes:
+	JMP	·memHash32Fallback(SB)
+
+TEXT ·MemHash64(SB),NOSPLIT,$0-12
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	MOVL	p+0(FP), AX	// ptr to data
+	MOVQ	(AX), X0	// data
+	PINSRD	$2, h+4(FP), X0	// seed
+	AESENC	·aeskeysched+0(SB), X0
+	AESENC	·aeskeysched+16(SB), X0
+	AESENC	·aeskeysched+32(SB), X0
+	MOVL	X0, ret+8(FP)
+	RET
+noaes:
+	JMP	·memHash64Fallback(SB)
+
+TEXT ·MemHash(SB),NOSPLIT,$0-16
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	MOVL	p+0(FP), AX	// ptr to data
+	MOVL	s+8(FP), BX	// size
+	LEAL	ret+12(FP), DX
+	JMP	·aeshashbody<>(SB)
+noaes:
+	JMP	·memHashFallback(SB)
+
+TEXT ·StrHash(SB),NOSPLIT,$0-12
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	MOVL	p+0(FP), AX	// ptr to string object
+	MOVL	4(AX), BX	// length of string
+	MOVL	(AX), AX	// string data
+	LEAL	ret+8(FP), DX
+	JMP	·aeshashbody<>(SB)
+noaes:
+	JMP	·strHashFallback(SB)
+
+// AX: data
+// BX: length
+// DX: address to put return value
+TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
+	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
+	PINSRW	$4, BX, X0	            // 16 bits of length
+	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
+	MOVO	X0, X1                      // save unscrambled seed
+	PXOR	·aeskeysched(SB), X0 // xor in per-process seed
+	AESENC	X0, X0                      // scramble seed
+
+	CMPL	BX, $16
+	JB	aes0to15
+	JE	aes16
+	CMPL	BX, $32
+	JBE	aes17to32
+	CMPL	BX, $64
+	JBE	aes33to64
+	JMP	aes65plus
+
+aes0to15:
+	TESTL	BX, BX
+	JE	aes0
+
+	ADDL	$16, AX
+	TESTW	$0xff0, AX
+	JE	endofpage
+
+	// 16 bytes loaded at this address won't cross
+	// a page boundary, so we can load it directly.
+	MOVOU	-16(AX), X1
+	ADDL	BX, BX
+	PAND	masks<>(SB)(BX*8), X1
+
+final1:
+	PXOR	X0, X1	// xor data with seed
+	AESENC	X1, X1  // scramble combo 3 times
+	AESENC	X1, X1
+	AESENC	X1, X1
+	MOVL	X1, (DX)
+	RET
+
+endofpage:
+	// address ends in 1111xxxx. Might be up against
+	// a page boundary, so load ending at last byte.
+	// Then shift bytes down using pshufb.
+	MOVOU	-32(AX)(BX*1), X1
+	ADDL	BX, BX
+	PSHUFB	shifts<>(SB)(BX*8), X1
+	JMP	final1
+
+aes0:
+	// Return scrambled input seed
+	AESENC	X0, X0
+	MOVL	X0, (DX)
+	RET
+
+aes16:
+	MOVOU	(AX), X1
+	JMP	final1
+
+aes17to32:
+	// make second starting seed
+	PXOR	·aeskeysched+16(SB), X1
+	AESENC	X1, X1
+
+	// load data to be hashed
+	MOVOU	(AX), X2
+	MOVOU	-16(AX)(BX*1), X3
+
+	// xor with seed
+	PXOR	X0, X2
+	PXOR	X1, X3
+
+	// scramble 3 times
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
+
+	// combine results
+	PXOR	X3, X2
+	MOVL	X2, (DX)
+	RET
+
+aes33to64:
+	// make 3 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	PXOR	·aeskeysched+16(SB), X1
+	PXOR	·aeskeysched+32(SB), X2
+	PXOR	·aeskeysched+48(SB), X3
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+
+	MOVOU	(AX), X4
+	MOVOU	16(AX), X5
+	MOVOU	-32(AX)(BX*1), X6
+	MOVOU	-16(AX)(BX*1), X7
+
+	PXOR	X0, X4
+	PXOR	X1, X5
+	PXOR	X2, X6
+	PXOR	X3, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	PXOR	X6, X4
+	PXOR	X7, X5
+	PXOR	X5, X4
+	MOVL	X4, (DX)
+	RET
+
+aes65plus:
+	// make 3 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	PXOR	·aeskeysched+16(SB), X1
+	PXOR	·aeskeysched+32(SB), X2
+	PXOR	·aeskeysched+48(SB), X3
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+
+	// start with last (possibly overlapping) block
+	MOVOU	-64(AX)(BX*1), X4
+	MOVOU	-48(AX)(BX*1), X5
+	MOVOU	-32(AX)(BX*1), X6
+	MOVOU	-16(AX)(BX*1), X7
+
+	// scramble state once
+	AESENC	X0, X4
+	AESENC	X1, X5
+	AESENC	X2, X6
+	AESENC	X3, X7
+
+	// compute number of remaining 64-byte blocks
+	DECL	BX
+	SHRL	$6, BX
+
+aesloop:
+	// scramble state, xor in a block
+	MOVOU	(AX), X0
+	MOVOU	16(AX), X1
+	MOVOU	32(AX), X2
+	MOVOU	48(AX), X3
+	AESENC	X0, X4
+	AESENC	X1, X5
+	AESENC	X2, X6
+	AESENC	X3, X7
+
+	// scramble state
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	ADDL	$64, AX
+	DECL	BX
+	JNE	aesloop
+
+	// 3 more scrambles to finish
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	PXOR	X6, X4
+	PXOR	X7, X5
+	PXOR	X5, X4
+	MOVL	X4, (DX)
+	RET
+
+// simple mask to get rid of data in the high part of the register.
+DATA masks<>+0x00(SB)/4, $0x00000000
+DATA masks<>+0x04(SB)/4, $0x00000000
+DATA masks<>+0x08(SB)/4, $0x00000000
+DATA masks<>+0x0c(SB)/4, $0x00000000
+
+DATA masks<>+0x10(SB)/4, $0x000000ff
+DATA masks<>+0x14(SB)/4, $0x00000000
+DATA masks<>+0x18(SB)/4, $0x00000000
+DATA masks<>+0x1c(SB)/4, $0x00000000
+
+DATA masks<>+0x20(SB)/4, $0x0000ffff
+DATA masks<>+0x24(SB)/4, $0x00000000
+DATA masks<>+0x28(SB)/4, $0x00000000
+DATA masks<>+0x2c(SB)/4, $0x00000000
+
+DATA masks<>+0x30(SB)/4, $0x00ffffff
+DATA masks<>+0x34(SB)/4, $0x00000000
+DATA masks<>+0x38(SB)/4, $0x00000000
+DATA masks<>+0x3c(SB)/4, $0x00000000
+
+DATA masks<>+0x40(SB)/4, $0xffffffff
+DATA masks<>+0x44(SB)/4, $0x00000000
+DATA masks<>+0x48(SB)/4, $0x00000000
+DATA masks<>+0x4c(SB)/4, $0x00000000
+
+DATA masks<>+0x50(SB)/4, $0xffffffff
+DATA masks<>+0x54(SB)/4, $0x000000ff
+DATA masks<>+0x58(SB)/4, $0x00000000
+DATA masks<>+0x5c(SB)/4, $0x00000000
+
+DATA masks<>+0x60(SB)/4, $0xffffffff
+DATA masks<>+0x64(SB)/4, $0x0000ffff
+DATA masks<>+0x68(SB)/4, $0x00000000
+DATA masks<>+0x6c(SB)/4, $0x00000000
+
+DATA masks<>+0x70(SB)/4, $0xffffffff
+DATA masks<>+0x74(SB)/4, $0x00ffffff
+DATA masks<>+0x78(SB)/4, $0x00000000
+DATA masks<>+0x7c(SB)/4, $0x00000000
+
+DATA masks<>+0x80(SB)/4, $0xffffffff
+DATA masks<>+0x84(SB)/4, $0xffffffff
+DATA masks<>+0x88(SB)/4, $0x00000000
+DATA masks<>+0x8c(SB)/4, $0x00000000
+
+DATA masks<>+0x90(SB)/4, $0xffffffff
+DATA masks<>+0x94(SB)/4, $0xffffffff
+DATA masks<>+0x98(SB)/4, $0x000000ff
+DATA masks<>+0x9c(SB)/4, $0x00000000
+
+DATA masks<>+0xa0(SB)/4, $0xffffffff
+DATA masks<>+0xa4(SB)/4, $0xffffffff
+DATA masks<>+0xa8(SB)/4, $0x0000ffff
+DATA masks<>+0xac(SB)/4, $0x00000000
+
+DATA masks<>+0xb0(SB)/4, $0xffffffff
+DATA masks<>+0xb4(SB)/4, $0xffffffff
+DATA masks<>+0xb8(SB)/4, $0x00ffffff
+DATA masks<>+0xbc(SB)/4, $0x00000000
+
+DATA masks<>+0xc0(SB)/4, $0xffffffff
+DATA masks<>+0xc4(SB)/4, $0xffffffff
+DATA masks<>+0xc8(SB)/4, $0xffffffff
+DATA masks<>+0xcc(SB)/4, $0x00000000
+
+DATA masks<>+0xd0(SB)/4, $0xffffffff
+DATA masks<>+0xd4(SB)/4, $0xffffffff
+DATA masks<>+0xd8(SB)/4, $0xffffffff
+DATA masks<>+0xdc(SB)/4, $0x000000ff
+
+DATA masks<>+0xe0(SB)/4, $0xffffffff
+DATA masks<>+0xe4(SB)/4, $0xffffffff
+DATA masks<>+0xe8(SB)/4, $0xffffffff
+DATA masks<>+0xec(SB)/4, $0x0000ffff
+
+DATA masks<>+0xf0(SB)/4, $0xffffffff
+DATA masks<>+0xf4(SB)/4, $0xffffffff
+DATA masks<>+0xf8(SB)/4, $0xffffffff
+DATA masks<>+0xfc(SB)/4, $0x00ffffff
+
+GLOBL masks<>(SB),RODATA,$256
+
+// these are arguments to pshufb. They move data down from
+// the high bytes of the register to the low bytes of the register.
+// index is how many bytes to move.
+DATA shifts<>+0x00(SB)/4, $0x00000000
+DATA shifts<>+0x04(SB)/4, $0x00000000
+DATA shifts<>+0x08(SB)/4, $0x00000000
+DATA shifts<>+0x0c(SB)/4, $0x00000000
+
+DATA shifts<>+0x10(SB)/4, $0xffffff0f
+DATA shifts<>+0x14(SB)/4, $0xffffffff
+DATA shifts<>+0x18(SB)/4, $0xffffffff
+DATA shifts<>+0x1c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x20(SB)/4, $0xffff0f0e
+DATA shifts<>+0x24(SB)/4, $0xffffffff
+DATA shifts<>+0x28(SB)/4, $0xffffffff
+DATA shifts<>+0x2c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
+DATA shifts<>+0x34(SB)/4, $0xffffffff
+DATA shifts<>+0x38(SB)/4, $0xffffffff
+DATA shifts<>+0x3c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
+DATA shifts<>+0x44(SB)/4, $0xffffffff
+DATA shifts<>+0x48(SB)/4, $0xffffffff
+DATA shifts<>+0x4c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
+DATA shifts<>+0x54(SB)/4, $0xffffff0f
+DATA shifts<>+0x58(SB)/4, $0xffffffff
+DATA shifts<>+0x5c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
+DATA shifts<>+0x64(SB)/4, $0xffff0f0e
+DATA shifts<>+0x68(SB)/4, $0xffffffff
+DATA shifts<>+0x6c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
+DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
+DATA shifts<>+0x78(SB)/4, $0xffffffff
+DATA shifts<>+0x7c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x80(SB)/4, $0x0b0a0908
+DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
+DATA shifts<>+0x88(SB)/4, $0xffffffff
+DATA shifts<>+0x8c(SB)/4, $0xffffffff
+
+DATA shifts<>+0x90(SB)/4, $0x0a090807
+DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
+DATA shifts<>+0x98(SB)/4, $0xffffff0f
+DATA shifts<>+0x9c(SB)/4, $0xffffffff
+
+DATA shifts<>+0xa0(SB)/4, $0x09080706
+DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
+DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
+DATA shifts<>+0xac(SB)/4, $0xffffffff
+
+DATA shifts<>+0xb0(SB)/4, $0x08070605
+DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
+DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
+DATA shifts<>+0xbc(SB)/4, $0xffffffff
+
+DATA shifts<>+0xc0(SB)/4, $0x07060504
+DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
+DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
+DATA shifts<>+0xcc(SB)/4, $0xffffffff
+
+DATA shifts<>+0xd0(SB)/4, $0x06050403
+DATA shifts<>+0xd4(SB)/4, $0x0a090807
+DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
+DATA shifts<>+0xdc(SB)/4, $0xffffff0f
+
+DATA shifts<>+0xe0(SB)/4, $0x05040302
+DATA shifts<>+0xe4(SB)/4, $0x09080706
+DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
+DATA shifts<>+0xec(SB)/4, $0xffff0f0e
+
+DATA shifts<>+0xf0(SB)/4, $0x04030201
+DATA shifts<>+0xf4(SB)/4, $0x08070605
+DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
+DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
+
+GLOBL shifts<>(SB),RODATA,$256
+
+TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
+	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
+	MOVL	$masks<>(SB), AX
+	MOVL	$shifts<>(SB), BX
+	ORL	BX, AX
+	TESTL	$15, AX
+	SETEQ   ret+0(FP)
+	RET
--- a/src/internal/runtime/maps/memhash_aes.go
+++ b/src/internal/runtime/maps/memhash_aes.go
@ -0,0 +1,23 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64 || 386
+
+package maps
+
+import (
+	"unsafe"
+)
+
+//go:noescape
+func MemHash(p unsafe.Pointer, h, s uintptr) uintptr
+
+//go:noescape
+func MemHash32(p unsafe.Pointer, h uintptr) uintptr
+
+//go:noescape
+func MemHash64(p unsafe.Pointer, h uintptr) uintptr
+
+//go:noescape
+func StrHash(p unsafe.Pointer, h uintptr) uintptr
--- a/src/internal/runtime/maps/memhash_align_check.go
+++ b/src/internal/runtime/maps/memhash_align_check.go
@ -0,0 +1,10 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || 386
+
+package maps
+
+// stub for memhash_{386,amd64}.s
+func checkMasksAndShiftsAlignment() bool
--- a/src/internal/runtime/maps/memhash_align_nocheck.go
+++ b/src/internal/runtime/maps/memhash_align_nocheck.go
@ -0,0 +1,16 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !(amd64 || 386)
+
+package maps
+
+func checkMasksAndShiftsAlignment() bool {
+	// This check is only meaningful on amd64/386, where the AES memhash
+	// implementation depends on these globals being properly aligned.
+	//
+	// Return false here so any accidental use on other architectures fails
+	// loudly rather than silently succeeding.
+	return false
+}
--- a/src/internal/runtime/maps/memhash_amd64.s
+++ b/src/internal/runtime/maps/memhash_amd64.s
@ -0,0 +1,481 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func MemHash32(p unsafe.Pointer, h uintptr) uintptr
+// ABIInternal for performance.
+TEXT ·MemHash32<ABIInternal>(SB),NOSPLIT,$0-24
+	// AX = ptr to data
+	// BX = seed
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	MOVQ	BX, X0	// X0 = seed
+	PINSRD	$2, (AX), X0	// data
+	AESENC	·aeskeysched+0(SB), X0
+	AESENC	·aeskeysched+16(SB), X0
+	AESENC	·aeskeysched+32(SB), X0
+	MOVQ	X0, AX	// return X0
+	RET
+noaes:
+	JMP	·memHash32Fallback<ABIInternal>(SB)
+
+// func MemHash64(p unsafe.Pointer, h uintptr) uintptr
+// ABIInternal for performance.
+TEXT ·MemHash64<ABIInternal>(SB),NOSPLIT,$0-24
+	// AX = ptr to data
+	// BX = seed
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	MOVQ	BX, X0	// X0 = seed
+	PINSRQ	$1, (AX), X0	// data
+	AESENC	·aeskeysched+0(SB), X0
+	AESENC	·aeskeysched+16(SB), X0
+	AESENC	·aeskeysched+32(SB), X0
+	MOVQ	X0, AX	// return X0
+	RET
+noaes:
+	JMP	·memHash64Fallback<ABIInternal>(SB)
+
+// func MemHash(p unsafe.Pointer, h, s uintptr) uintptr
+// hash function using AES hardware instructions
+TEXT ·MemHash<ABIInternal>(SB),NOSPLIT,$0-32
+	// AX = ptr to data
+	// BX = seed
+	// CX = size
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	JMP	·aeshashbody<>(SB)
+noaes:
+	JMP	·memHashFallback<ABIInternal>(SB)
+
+// func strhash(p unsafe.Pointer, h uintptr) uintptr
+TEXT ·StrHash<ABIInternal>(SB),NOSPLIT,$0-24
+	// AX = ptr to string struct
+	// BX = seed
+	CMPB	·UseAeshash(SB), $0
+	JEQ	noaes
+	MOVQ	8(AX), CX	// length of string
+	MOVQ	(AX), AX	// string data
+	JMP	·aeshashbody<>(SB)
+noaes:
+	JMP	·strHashFallback<ABIInternal>(SB)
+
+// AX: data
+// BX: hash seed
+// CX: length
+// At return: AX = return value
+TEXT ·aeshashbody<>(SB),NOSPLIT,$0-0
+	// Fill an SSE register with our seeds.
+	MOVQ	BX, X0				// 64 bits of per-table hash seed
+	PINSRW	$4, CX, X0			// 16 bits of length
+	PSHUFHW $0, X0, X0			// repeat length 4 times total
+	MOVO	X0, X1				// save unscrambled seed
+	PXOR	·aeskeysched(SB), X0	// xor in per-process seed
+	AESENC	X0, X0				// scramble seed
+
+	CMPQ	CX, $16
+	JB	aes0to15
+	JE	aes16
+	CMPQ	CX, $32
+	JBE	aes17to32
+	CMPQ	CX, $64
+	JBE	aes33to64
+	CMPQ	CX, $128
+	JBE	aes65to128
+	JMP	aes129plus
+
+aes0to15:
+	TESTQ	CX, CX
+	JE	aes0
+
+	ADDQ	$16, AX
+	TESTW	$0xff0, AX
+	JE	endofpage
+
+	// 16 bytes loaded at this address won't cross
+	// a page boundary, so we can load it directly.
+	MOVOU	-16(AX), X1
+	ADDQ	CX, CX
+	MOVQ	$masks<>(SB), AX
+	PAND	(AX)(CX*8), X1
+final1:
+	PXOR	X0, X1	// xor data with seed
+	AESENC	X1, X1	// scramble combo 3 times
+	AESENC	X1, X1
+	AESENC	X1, X1
+	MOVQ	X1, AX	// return X1
+	RET
+
+endofpage:
+	// address ends in 1111xxxx. Might be up against
+	// a page boundary, so load ending at last byte.
+	// Then shift bytes down using pshufb.
+	MOVOU	-32(AX)(CX*1), X1
+	ADDQ	CX, CX
+	MOVQ	$shifts<>(SB), AX
+	PSHUFB	(AX)(CX*8), X1
+	JMP	final1
+
+aes0:
+	// Return scrambled input seed
+	AESENC	X0, X0
+	MOVQ	X0, AX	// return X0
+	RET
+
+aes16:
+	MOVOU	(AX), X1
+	JMP	final1
+
+aes17to32:
+	// make second starting seed
+	PXOR	·aeskeysched+16(SB), X1
+	AESENC	X1, X1
+
+	// load data to be hashed
+	MOVOU	(AX), X2
+	MOVOU	-16(AX)(CX*1), X3
+
+	// xor with seed
+	PXOR	X0, X2
+	PXOR	X1, X3
+
+	// scramble 3 times
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
+
+	// combine results
+	PXOR	X3, X2
+	MOVQ	X2, AX	// return X2
+	RET
+
+aes33to64:
+	// make 3 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	PXOR	·aeskeysched+16(SB), X1
+	PXOR	·aeskeysched+32(SB), X2
+	PXOR	·aeskeysched+48(SB), X3
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+
+	MOVOU	(AX), X4
+	MOVOU	16(AX), X5
+	MOVOU	-32(AX)(CX*1), X6
+	MOVOU	-16(AX)(CX*1), X7
+
+	PXOR	X0, X4
+	PXOR	X1, X5
+	PXOR	X2, X6
+	PXOR	X3, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	PXOR	X6, X4
+	PXOR	X7, X5
+	PXOR	X5, X4
+	MOVQ	X4, AX	// return X4
+	RET
+
+aes65to128:
+	// make 7 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	MOVO	X1, X4
+	MOVO	X1, X5
+	MOVO	X1, X6
+	MOVO	X1, X7
+	PXOR	·aeskeysched+16(SB), X1
+	PXOR	·aeskeysched+32(SB), X2
+	PXOR	·aeskeysched+48(SB), X3
+	PXOR	·aeskeysched+64(SB), X4
+	PXOR	·aeskeysched+80(SB), X5
+	PXOR	·aeskeysched+96(SB), X6
+	PXOR	·aeskeysched+112(SB), X7
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	// load data
+	MOVOU	(AX), X8
+	MOVOU	16(AX), X9
+	MOVOU	32(AX), X10
+	MOVOU	48(AX), X11
+	MOVOU	-64(AX)(CX*1), X12
+	MOVOU	-48(AX)(CX*1), X13
+	MOVOU	-32(AX)(CX*1), X14
+	MOVOU	-16(AX)(CX*1), X15
+
+	// xor with seed
+	PXOR	X0, X8
+	PXOR	X1, X9
+	PXOR	X2, X10
+	PXOR	X3, X11
+	PXOR	X4, X12
+	PXOR	X5, X13
+	PXOR	X6, X14
+	PXOR	X7, X15
+
+	// scramble 3 times
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+
+	// combine results
+	PXOR	X12, X8
+	PXOR	X13, X9
+	PXOR	X14, X10
+	PXOR	X15, X11
+	PXOR	X10, X8
+	PXOR	X11, X9
+	PXOR	X9, X8
+	// X15 must be zero on return
+	PXOR	X15, X15
+	MOVQ	X8, AX	// return X8
+	RET
+
+aes129plus:
+	// make 7 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	MOVO	X1, X4
+	MOVO	X1, X5
+	MOVO	X1, X6
+	MOVO	X1, X7
+	PXOR	·aeskeysched+16(SB), X1
+	PXOR	·aeskeysched+32(SB), X2
+	PXOR	·aeskeysched+48(SB), X3
+	PXOR	·aeskeysched+64(SB), X4
+	PXOR	·aeskeysched+80(SB), X5
+	PXOR	·aeskeysched+96(SB), X6
+	PXOR	·aeskeysched+112(SB), X7
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+
+	// start with last (possibly overlapping) block
+	MOVOU	-128(AX)(CX*1), X8
+	MOVOU	-112(AX)(CX*1), X9
+	MOVOU	-96(AX)(CX*1), X10
+	MOVOU	-80(AX)(CX*1), X11
+	MOVOU	-64(AX)(CX*1), X12
+	MOVOU	-48(AX)(CX*1), X13
+	MOVOU	-32(AX)(CX*1), X14
+	MOVOU	-16(AX)(CX*1), X15
+
+	// xor in seed
+	PXOR	X0, X8
+	PXOR	X1, X9
+	PXOR	X2, X10
+	PXOR	X3, X11
+	PXOR	X4, X12
+	PXOR	X5, X13
+	PXOR	X6, X14
+	PXOR	X7, X15
+
+	// compute number of remaining 128-byte blocks
+	DECQ	CX
+	SHRQ	$7, CX
+
+	PCALIGN $16
+aesloop:
+	// scramble state
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+
+	// scramble state, xor in a block
+	MOVOU	(AX), X0
+	MOVOU	16(AX), X1
+	MOVOU	32(AX), X2
+	MOVOU	48(AX), X3
+	AESENC	X0, X8
+	AESENC	X1, X9
+	AESENC	X2, X10
+	AESENC	X3, X11
+	MOVOU	64(AX), X4
+	MOVOU	80(AX), X5
+	MOVOU	96(AX), X6
+	MOVOU	112(AX), X7
+	AESENC	X4, X12
+	AESENC	X5, X13
+	AESENC	X6, X14
+	AESENC	X7, X15
+
+	ADDQ	$128, AX
+	DECQ	CX
+	JNE	aesloop
+
+	// 3 more scrambles to finish
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+
+	PXOR	X12, X8
+	PXOR	X13, X9
+	PXOR	X14, X10
+	PXOR	X15, X11
+	PXOR	X10, X8
+	PXOR	X11, X9
+	PXOR	X9, X8
+	// X15 must be zero on return
+	PXOR	X15, X15
+	MOVQ	X8, AX	// return X8
+	RET
+
+// simple mask to get rid of data in the high part of the register.
+DATA masks<>+0x00(SB)/8, $0x0000000000000000
+DATA masks<>+0x08(SB)/8, $0x0000000000000000
+DATA masks<>+0x10(SB)/8, $0x00000000000000ff
+DATA masks<>+0x18(SB)/8, $0x0000000000000000
+DATA masks<>+0x20(SB)/8, $0x000000000000ffff
+DATA masks<>+0x28(SB)/8, $0x0000000000000000
+DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
+DATA masks<>+0x38(SB)/8, $0x0000000000000000
+DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
+DATA masks<>+0x48(SB)/8, $0x0000000000000000
+DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
+DATA masks<>+0x58(SB)/8, $0x0000000000000000
+DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
+DATA masks<>+0x68(SB)/8, $0x0000000000000000
+DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
+DATA masks<>+0x78(SB)/8, $0x0000000000000000
+DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
+DATA masks<>+0x88(SB)/8, $0x0000000000000000
+DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
+DATA masks<>+0x98(SB)/8, $0x00000000000000ff
+DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
+DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
+DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
+DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
+DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
+DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
+GLOBL masks<>(SB),RODATA,$256
+
+// these are arguments to pshufb. They move data down from
+// the high bytes of the register to the low bytes of the register.
+// index is how many bytes to move.
+DATA shifts<>+0x00(SB)/8, $0x0000000000000000
+DATA shifts<>+0x08(SB)/8, $0x0000000000000000
+DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
+DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
+DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
+DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
+DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
+DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
+DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
+DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
+DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
+DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
+DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
+DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
+DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
+DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
+DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
+DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
+DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
+DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
+DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
+DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
+DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
+DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
+GLOBL shifts<>(SB),RODATA,$256
+
+TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
+	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
+	MOVQ	$masks<>(SB), AX
+	MOVQ	$shifts<>(SB), BX
+	ORQ	BX, AX
+	TESTQ	$15, AX
+	SETEQ	AX
+	RET
--- a/src/internal/runtime/maps/memhash_arm64.s
+++ b/src/internal/runtime/maps/memhash_arm64.s
@ -0,0 +1,390 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func MemHash32(p unsafe.Pointer, h uintptr) uintptr
+TEXT ·MemHash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
+	MOVB	·UseAeshash(SB), R10
+	CBZ	R10, noaes
+	MOVD	$·aeskeysched+0(SB), R3
+
+	VEOR	V0.B16, V0.B16, V0.B16
+	VLD1	(R3), [V2.B16]
+	VLD1	(R0), V0.S[2]
+	VMOV	R1, V0.D[0]
+
+	AESE	V2.B16, V0.B16
+	AESMC	V0.B16, V0.B16
+	AESE	V2.B16, V0.B16
+	AESMC	V0.B16, V0.B16
+	AESE	V2.B16, V0.B16
+
+	VMOV	V0.D[0], R0
+	RET
+noaes:
+	B	·memHash32Fallback<ABIInternal>(SB)
+
+// func MemHash64(p unsafe.Pointer, h uintptr) uintptr
+TEXT ·MemHash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
+	MOVB	·UseAeshash(SB), R10
+	CBZ	R10, noaes
+	MOVD	$·aeskeysched+0(SB), R3
+
+	VEOR	V0.B16, V0.B16, V0.B16
+	VLD1	(R3), [V2.B16]
+	VLD1	(R0), V0.D[1]
+	VMOV	R1, V0.D[0]
+
+	AESE	V2.B16, V0.B16
+	AESMC	V0.B16, V0.B16
+	AESE	V2.B16, V0.B16
+	AESMC	V0.B16, V0.B16
+	AESE	V2.B16, V0.B16
+
+	VMOV	V0.D[0], R0
+	RET
+noaes:
+	B	·memHash64Fallback<ABIInternal>(SB)
+
+// func MemHash(p unsafe.Pointer, h, size uintptr) uintptr
+TEXT ·MemHash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
+	MOVB	·UseAeshash(SB), R10
+	CBZ	R10, noaes
+	B	·aeshashbody<>(SB)
+noaes:
+	B	·memHashFallback<ABIInternal>(SB)
+
+// func StrHash(p unsafe.Pointer, h uintptr) uintptr
+TEXT ·StrHash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
+	MOVB	·UseAeshash(SB), R10
+	CBZ	R10, noaes
+	LDP	(R0), (R0, R2)	// string data / length
+	B	·aeshashbody<>(SB)
+noaes:
+	B	·strHashFallback<ABIInternal>(SB)
+
+// R0: data
+// R1: seed data
+// R2: length
+// At return, R0 = return value
+TEXT ·aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
+	VEOR	V30.B16, V30.B16, V30.B16
+	VMOV	R1, V30.D[0]
+	VMOV	R2, V30.D[1] // load length into seed
+
+	MOVD	$·aeskeysched+0(SB), R4
+	VLD1.P	16(R4), [V0.B16]
+	AESE	V30.B16, V0.B16
+	AESMC	V0.B16, V0.B16
+	CMP	$16, R2
+	BLO	aes0to15
+	BEQ	aes16
+	CMP	$32, R2
+	BLS	aes17to32
+	CMP	$64, R2
+	BLS	aes33to64
+	CMP	$128, R2
+	BLS	aes65to128
+	B	aes129plus
+
+aes0to15:
+	CBZ	R2, aes0
+	VEOR	V2.B16, V2.B16, V2.B16
+	TBZ	$3, R2, less_than_8
+	VLD1.P	8(R0), V2.D[0]
+
+less_than_8:
+	TBZ	$2, R2, less_than_4
+	VLD1.P	4(R0), V2.S[2]
+
+less_than_4:
+	TBZ	$1, R2, less_than_2
+	VLD1.P	2(R0), V2.H[6]
+
+less_than_2:
+	TBZ	$0, R2, done
+	VLD1	(R0), V2.B[14]
+done:
+	AESE	V0.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+	AESE	V0.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+	AESE	V0.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+
+	VMOV	V2.D[0], R0
+	RET
+
+aes0:
+	VMOV	V0.D[0], R0
+	RET
+
+aes16:
+	VLD1	(R0), [V2.B16]
+	B	done
+
+aes17to32:
+	// make second seed
+	VLD1	(R4), [V1.B16]
+	AESE	V30.B16, V1.B16
+	AESMC	V1.B16, V1.B16
+	SUB	$16, R2, R10
+	VLD1.P	(R0)(R10), [V2.B16]
+	VLD1	(R0), [V3.B16]
+
+	AESE	V0.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+	AESE	V1.B16, V3.B16
+	AESMC	V3.B16, V3.B16
+
+	AESE	V0.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+	AESE	V1.B16, V3.B16
+	AESMC	V3.B16, V3.B16
+
+	AESE	V0.B16, V2.B16
+	AESE	V1.B16, V3.B16
+
+	VEOR	V3.B16, V2.B16, V2.B16
+
+	VMOV	V2.D[0], R0
+	RET
+
+aes33to64:
+	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
+	AESE	V30.B16, V1.B16
+	AESMC	V1.B16, V1.B16
+	AESE	V30.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+	AESE	V30.B16, V3.B16
+	AESMC	V3.B16, V3.B16
+	SUB	$32, R2, R10
+
+	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
+	VLD1	(R0), [V6.B16, V7.B16]
+
+	AESE	V0.B16, V4.B16
+	AESMC	V4.B16, V4.B16
+	AESE	V1.B16, V5.B16
+	AESMC	V5.B16, V5.B16
+	AESE	V2.B16, V6.B16
+	AESMC	V6.B16, V6.B16
+	AESE	V3.B16, V7.B16
+	AESMC	V7.B16, V7.B16
+
+	AESE	V0.B16, V4.B16
+	AESMC	V4.B16, V4.B16
+	AESE	V1.B16, V5.B16
+	AESMC	V5.B16, V5.B16
+	AESE	V2.B16, V6.B16
+	AESMC	V6.B16, V6.B16
+	AESE	V3.B16, V7.B16
+	AESMC	V7.B16, V7.B16
+
+	AESE	V0.B16, V4.B16
+	AESE	V1.B16, V5.B16
+	AESE	V2.B16, V6.B16
+	AESE	V3.B16, V7.B16
+
+	VEOR	V6.B16, V4.B16, V4.B16
+	VEOR	V7.B16, V5.B16, V5.B16
+	VEOR	V5.B16, V4.B16, V4.B16
+
+	VMOV	V4.D[0], R0
+	RET
+
+aes65to128:
+	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
+	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
+	AESE	V30.B16, V1.B16
+	AESMC	V1.B16, V1.B16
+	AESE	V30.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+	AESE	V30.B16, V3.B16
+	AESMC	V3.B16, V3.B16
+	AESE	V30.B16, V4.B16
+	AESMC	V4.B16, V4.B16
+	AESE	V30.B16, V5.B16
+	AESMC	V5.B16, V5.B16
+	AESE	V30.B16, V6.B16
+	AESMC	V6.B16, V6.B16
+	AESE	V30.B16, V7.B16
+	AESMC	V7.B16, V7.B16
+
+	SUB	$64, R2, R10
+	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
+	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
+	AESE	V0.B16,	 V8.B16
+	AESMC	V8.B16,  V8.B16
+	AESE	V1.B16,	 V9.B16
+	AESMC	V9.B16,  V9.B16
+	AESE	V2.B16, V10.B16
+	AESMC	V10.B16,  V10.B16
+	AESE	V3.B16, V11.B16
+	AESMC	V11.B16,  V11.B16
+	AESE	V4.B16, V12.B16
+	AESMC	V12.B16,  V12.B16
+	AESE	V5.B16, V13.B16
+	AESMC	V13.B16,  V13.B16
+	AESE	V6.B16, V14.B16
+	AESMC	V14.B16,  V14.B16
+	AESE	V7.B16, V15.B16
+	AESMC	V15.B16,  V15.B16
+
+	AESE	V0.B16,	 V8.B16
+	AESMC	V8.B16,  V8.B16
+	AESE	V1.B16,	 V9.B16
+	AESMC	V9.B16,  V9.B16
+	AESE	V2.B16, V10.B16
+	AESMC	V10.B16,  V10.B16
+	AESE	V3.B16, V11.B16
+	AESMC	V11.B16,  V11.B16
+	AESE	V4.B16, V12.B16
+	AESMC	V12.B16,  V12.B16
+	AESE	V5.B16, V13.B16
+	AESMC	V13.B16,  V13.B16
+	AESE	V6.B16, V14.B16
+	AESMC	V14.B16,  V14.B16
+	AESE	V7.B16, V15.B16
+	AESMC	V15.B16,  V15.B16
+
+	AESE	V0.B16,	 V8.B16
+	AESE	V1.B16,	 V9.B16
+	AESE	V2.B16, V10.B16
+	AESE	V3.B16, V11.B16
+	AESE	V4.B16, V12.B16
+	AESE	V5.B16, V13.B16
+	AESE	V6.B16, V14.B16
+	AESE	V7.B16, V15.B16
+
+	VEOR	V12.B16, V8.B16, V8.B16
+	VEOR	V13.B16, V9.B16, V9.B16
+	VEOR	V14.B16, V10.B16, V10.B16
+	VEOR	V15.B16, V11.B16, V11.B16
+	VEOR	V10.B16, V8.B16, V8.B16
+	VEOR	V11.B16, V9.B16, V9.B16
+	VEOR	V9.B16, V8.B16, V8.B16
+
+	VMOV	V8.D[0], R0
+	RET
+
+aes129plus:
+	PRFM (R0), PLDL1KEEP
+	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
+	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
+	AESE	V30.B16, V1.B16
+	AESMC	V1.B16, V1.B16
+	AESE	V30.B16, V2.B16
+	AESMC	V2.B16, V2.B16
+	AESE	V30.B16, V3.B16
+	AESMC	V3.B16, V3.B16
+	AESE	V30.B16, V4.B16
+	AESMC	V4.B16, V4.B16
+	AESE	V30.B16, V5.B16
+	AESMC	V5.B16, V5.B16
+	AESE	V30.B16, V6.B16
+	AESMC	V6.B16, V6.B16
+	AESE	V30.B16, V7.B16
+	AESMC	V7.B16, V7.B16
+	ADD	R0, R2, R10
+	SUB	$128, R10, R10
+	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
+	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
+	SUB	$1, R2, R2
+	LSR	$7, R2, R2
+
+aesloop:
+	AESE	V8.B16,	 V0.B16
+	AESMC	V0.B16,  V0.B16
+	AESE	V9.B16,	 V1.B16
+	AESMC	V1.B16,  V1.B16
+	AESE	V10.B16, V2.B16
+	AESMC	V2.B16,  V2.B16
+	AESE	V11.B16, V3.B16
+	AESMC	V3.B16,  V3.B16
+	AESE	V12.B16, V4.B16
+	AESMC	V4.B16,  V4.B16
+	AESE	V13.B16, V5.B16
+	AESMC	V5.B16,  V5.B16
+	AESE	V14.B16, V6.B16
+	AESMC	V6.B16,  V6.B16
+	AESE	V15.B16, V7.B16
+	AESMC	V7.B16,  V7.B16
+
+	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
+	AESE	V8.B16,	 V0.B16
+	AESMC	V0.B16,  V0.B16
+	AESE	V9.B16,	 V1.B16
+	AESMC	V1.B16,  V1.B16
+	AESE	V10.B16, V2.B16
+	AESMC	V2.B16,  V2.B16
+	AESE	V11.B16, V3.B16
+	AESMC	V3.B16,  V3.B16
+
+	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
+	AESE	V12.B16, V4.B16
+	AESMC	V4.B16,  V4.B16
+	AESE	V13.B16, V5.B16
+	AESMC	V5.B16,  V5.B16
+	AESE	V14.B16, V6.B16
+	AESMC	V6.B16,  V6.B16
+	AESE	V15.B16, V7.B16
+	AESMC	V7.B16,  V7.B16
+	SUB	$1, R2, R2
+	CBNZ	R2, aesloop
+
+	AESE	V8.B16,	 V0.B16
+	AESMC	V0.B16,  V0.B16
+	AESE	V9.B16,	 V1.B16
+	AESMC	V1.B16,  V1.B16
+	AESE	V10.B16, V2.B16
+	AESMC	V2.B16,  V2.B16
+	AESE	V11.B16, V3.B16
+	AESMC	V3.B16,  V3.B16
+	AESE	V12.B16, V4.B16
+	AESMC	V4.B16,  V4.B16
+	AESE	V13.B16, V5.B16
+	AESMC	V5.B16,  V5.B16
+	AESE	V14.B16, V6.B16
+	AESMC	V6.B16,  V6.B16
+	AESE	V15.B16, V7.B16
+	AESMC	V7.B16,  V7.B16
+
+	AESE	V8.B16,	 V0.B16
+	AESMC	V0.B16,  V0.B16
+	AESE	V9.B16,	 V1.B16
+	AESMC	V1.B16,  V1.B16
+	AESE	V10.B16, V2.B16
+	AESMC	V2.B16,  V2.B16
+	AESE	V11.B16, V3.B16
+	AESMC	V3.B16,  V3.B16
+	AESE	V12.B16, V4.B16
+	AESMC	V4.B16,  V4.B16
+	AESE	V13.B16, V5.B16
+	AESMC	V5.B16,  V5.B16
+	AESE	V14.B16, V6.B16
+	AESMC	V6.B16,  V6.B16
+	AESE	V15.B16, V7.B16
+	AESMC	V7.B16,  V7.B16
+
+	AESE	V8.B16,	 V0.B16
+	AESE	V9.B16,	 V1.B16
+	AESE	V10.B16, V2.B16
+	AESE	V11.B16, V3.B16
+	AESE	V12.B16, V4.B16
+	AESE	V13.B16, V5.B16
+	AESE	V14.B16, V6.B16
+	AESE	V15.B16, V7.B16
+
+	VEOR	V0.B16, V1.B16, V0.B16
+	VEOR	V2.B16, V3.B16, V2.B16
+	VEOR	V4.B16, V5.B16, V4.B16
+	VEOR	V6.B16, V7.B16, V6.B16
+	VEOR	V0.B16, V2.B16, V0.B16
+	VEOR	V4.B16, V6.B16, V4.B16
+	VEOR	V4.B16, V0.B16, V0.B16
+
+	VMOV	V0.D[0], R0
+	RET
--- a/src/internal/runtime/maps/memhash_noaes.go
+++ b/src/internal/runtime/maps/memhash_noaes.go
@ -0,0 +1,28 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !(amd64 || arm64 || 386)
+
+package maps
+
+import (
+	"unsafe"
+)
+
+// AES hashing not implemented for these architectures
+func MemHash(p unsafe.Pointer, h, s uintptr) uintptr {
+	return memHashFallback(p, h, s)
+}
+
+func MemHash32(p unsafe.Pointer, h uintptr) uintptr {
+	return memHash32Fallback(p, h)
+}
+
+func MemHash64(p unsafe.Pointer, h uintptr) uintptr {
+	return memHash64Fallback(p, h)
+}
+
+func StrHash(p unsafe.Pointer, h uintptr) uintptr {
+	return strHashFallback(p, h)
+}
--- a/src/internal/runtime/maps/runtime.go
+++ b/src/internal/runtime/maps/runtime.go
@ -16,21 +16,12 @@ import (

 // Functions below pushed from runtime.
 //
-//go:noescape
-//go:linkname memhash32 runtime.memhash32
-func memhash32(p unsafe.Pointer, h uintptr) uintptr
-
-//go:noescape
-//go:linkname memhash64 runtime.memhash64
-func memhash64(p unsafe.Pointer, h uintptr) uintptr
-
-//go:noescape
-//go:linkname strhash runtime.strhash
-func strhash(a unsafe.Pointer, h uintptr) uintptr
-
 //go:linkname fatal
 func fatal(s string)

+//go:linkname bootstrapRand runtime.bootstrapRand
+func bootstrapRand() uint64
+
 //go:linkname rand
 func rand() uint64

--- a/src/internal/runtime/maps/runtime_alg.go
+++ b/src/internal/runtime/maps/runtime_alg.go
@ -0,0 +1,91 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package maps
+
+import (
+	"internal/byteorder"
+	"internal/cpu"
+	"internal/goarch"
+	"unsafe"
+)
+
+// runtime variable to check if the processor we're running on
+// actually supports the instructions used by the AES-based
+// hash implementation.
+var UseAeshash bool
+
+const hashRandomBytes = goarch.PtrSize / 4 * 64
+
+// used to seed the hash function
+var aeskeysched [hashRandomBytes]byte
+
+// used in hash{32,64}.go to seed the hash function
+var hashkey [4]uintptr
+
+func AlgInit() {
+	// Install AES hash algorithms if the instructions needed are present.
+	if (goarch.GOARCH == "386" || goarch.GOARCH == "amd64") &&
+		cpu.X86.HasAES && // AESENC
+		cpu.X86.HasSSSE3 && // PSHUFB
+		cpu.X86.HasSSE41 { // PINSR{D,Q}
+
+		// In aeshashbody (that is used by memhash & strhash)
+		// we have global variables that should be properly aligned.
+		//
+		// See #12415
+		if !checkMasksAndShiftsAlignment() {
+			fatal("maps: global variables for AES hashing are not properly aligned!")
+		}
+		initAlgAES()
+		return
+	}
+	if goarch.GOARCH == "arm64" && cpu.ARM64.HasAES {
+		initAlgAES()
+		return
+	}
+	for i := range hashkey {
+		hashkey[i] = uintptr(bootstrapRand())
+	}
+}
+
+func initAlgAES() {
+	UseAeshash = true
+	// Initialize with random data so hash collisions will be hard to engineer.
+	key := (*[hashRandomBytes / 8]uint64)(unsafe.Pointer(&aeskeysched))
+	for i := range key {
+		key[i] = bootstrapRand()
+	}
+}
+
+func strHashFallback(a unsafe.Pointer, h uintptr) uintptr {
+	type stringStruct struct {
+		str unsafe.Pointer
+		len int
+	}
+	x := (*stringStruct)(a)
+	return memHashFallback(x.str, h, uintptr(x.len))
+}
+
+//go:nosplit
+func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
+	return unsafe.Pointer(uintptr(p) + x)
+}
+
+// Note: These routines perform the read with a native endianness.
+func readUnaligned32(p unsafe.Pointer) uint32 {
+	q := (*[4]byte)(p)
+	if goarch.BigEndian {
+		return byteorder.BEUint32(q[:])
+	}
+	return byteorder.LEUint32(q[:])
+}
+
+func readUnaligned64(p unsafe.Pointer) uint64 {
+	q := (*[8]byte)(p)
+	if goarch.BigEndian {
+		return byteorder.BEUint64(q[:])
+	}
+	return byteorder.LEUint64(q[:])
+}
--- a/src/internal/runtime/maps/runtime_fast32.go
+++ b/src/internal/runtime/maps/runtime_fast32.go
@ -75,7 +75,7 @@ func runtime_mapaccess2_fast32(typ *abi.MapType, m *Map, key uint32) (unsafe.Poi
 	// However, from compiler's perspective, key is no longer address-taken and
 	// filled back in register before the loop.
 	k := key
-	hash := memhash32(unsafe.Pointer(&k), m.seed)
+	hash := MemHash32(unsafe.Pointer(&k), m.seed)

 	// Select table.
 	idx := m.directoryIndex(hash)
@ -169,7 +169,7 @@ func runtime_mapassign_fast32(typ *abi.MapType, m *Map, key uint32) unsafe.Point
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := memhash32(unsafe.Pointer(&k), m.seed)
+	hash := MemHash32(unsafe.Pointer(&k), m.seed)

 	// Set writing after calling Hasher, since Hasher may panic, in which
 	// case we have not actually done a write.
@ -311,7 +311,7 @@ func runtime_mapassign_fast32ptr(typ *abi.MapType, m *Map, key unsafe.Pointer) u
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := memhash32(unsafe.Pointer(&k), m.seed)
+	hash := MemHash32(unsafe.Pointer(&k), m.seed)

 	// Set writing after calling Hasher, since Hasher may panic, in which
 	// case we have not actually done a write.
--- a/src/internal/runtime/maps/runtime_fast64.go
+++ b/src/internal/runtime/maps/runtime_fast64.go
@ -66,7 +66,7 @@ func runtime_mapaccess2_fast64(typ *abi.MapType, m *Map, key uint64) (unsafe.Poi
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := memhash64(unsafe.Pointer(&k), m.seed)
+	hash := MemHash64(unsafe.Pointer(&k), m.seed)

 	// Select table.
 	idx := m.directoryIndex(hash)
@ -161,7 +161,7 @@ func runtime_mapassign_fast64(typ *abi.MapType, m *Map, key uint64) unsafe.Point
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := memhash64(unsafe.Pointer(&k), m.seed)
+	hash := MemHash64(unsafe.Pointer(&k), m.seed)

 	// Set writing after calling Hasher, since Hasher may panic, in which
 	// case we have not actually done a write.
@ -341,7 +341,7 @@ func runtime_mapassign_fast64ptr(typ *abi.MapType, m *Map, key unsafe.Pointer) u
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := memhash64(unsafe.Pointer(&k), m.seed)
+	hash := MemHash64(unsafe.Pointer(&k), m.seed)

 	// Set writing after calling Hasher, since Hasher may panic, in which
 	// case we have not actually done a write.
--- a/src/internal/runtime/maps/runtime_faststr.go
+++ b/src/internal/runtime/maps/runtime_faststr.go
@ -68,7 +68,7 @@ dohash:
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := strhash(unsafe.Pointer(&k), m.seed)
+	hash := StrHash(unsafe.Pointer(&k), m.seed)
 	h2 := uint8(h2(hash))
 	ctrls = *g.ctrls()
 	slotKey = g.key(typ, 0)
@ -149,7 +149,7 @@ func runtime_mapaccess2_faststr(typ *abi.MapType, m *Map, key string) (unsafe.Po
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := strhash(unsafe.Pointer(&k), m.seed)
+	hash := StrHash(unsafe.Pointer(&k), m.seed)

 	// Select table.
 	idx := m.directoryIndex(hash)
@ -245,7 +245,7 @@ func runtime_mapassign_faststr(typ *abi.MapType, m *Map, key string) unsafe.Poin
 	// See the related comment in runtime_mapaccess2_fast32
 	// for why we pass local copy of key.
 	k := key
-	hash := strhash(unsafe.Pointer(&k), m.seed)
+	hash := StrHash(unsafe.Pointer(&k), m.seed)

 	// Set writing after calling Hasher, since Hasher may panic, in which
 	// case we have not actually done a write.
--- a/src/internal/runtime/maps/runtime_hash32.go
+++ b/src/internal/runtime/maps/runtime_hash32.go
@ -7,11 +7,11 @@

 //go:build 386 || arm || mips || mipsle || wasm || (gccgo && (ppc || s390))

-package runtime
+package maps

 import "unsafe"

-func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
+func memHash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
 	a, b := mix32(uint32(seed), uint32(4^hashkey[0]))
 	t := readUnaligned32(p)
 	a ^= t
@ -21,7 +21,7 @@ func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
 	return uintptr(a ^ b)
 }

-func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
+func memHash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
 	a, b := mix32(uint32(seed), uint32(8^hashkey[0]))
 	a ^= readUnaligned32(p)
 	b ^= readUnaligned32(add(p, 4))
@ -30,7 +30,7 @@ func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
 	return uintptr(a ^ b)
 }

-func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
+func memHashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {

 	a, b := mix32(uint32(seed), uint32(s^hashkey[0]))
 	if s == 0 {
--- a/src/internal/runtime/maps/runtime_hash64.go
+++ b/src/internal/runtime/maps/runtime_hash64.go
@ -7,7 +7,7 @@

 //go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x

-package runtime
+package maps

 import (
 	"math/bits"
@ -18,7 +18,7 @@ const (
 	m5 = 0x1d8e4e27c47d124f
 )

-func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
+func memHashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
 	var a, b uintptr
 	seed ^= hashkey[0]
 	switch {
@ -64,12 +64,12 @@ func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
 	return mix(m5^s, mix(a^hashkey[1], b^seed))
 }

-func memhash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
+func memHash32Fallback(p unsafe.Pointer, seed uintptr) uintptr {
 	a := r4(p)
 	return mix(m5^4, mix(a^hashkey[1], a^seed^hashkey[0]))
 }

-func memhash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
+func memHash64Fallback(p unsafe.Pointer, seed uintptr) uintptr {
 	a := r8(p)
 	return mix(m5^8, mix(a^hashkey[1], a^seed^hashkey[0]))
 }
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@ -7,8 +7,8 @@ package runtime
 import (
 	"internal/abi"
 	"internal/byteorder"
-	"internal/cpu"
 	"internal/goarch"
+	"internal/runtime/maps"
 	"internal/runtime/sys"
 	"unsafe"
 )
@ -54,12 +54,11 @@ func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr {
 	return memhash(p, h, size)
 }

-// runtime variable to check if the processor we're running on
-// actually supports the instructions used by the AES-based
-// hash implementation.
-var useAeshash bool
-
-// in asm_*.s
+// This is simple wrappers.
+// It's better to use maps.MemHash functions directly,
+// but we have reflection code that still calls hashing from runtime via LookupRuntime,
+// so we have to try to minimize overhead of an extra call.
+// For this add nosplit for performance

 // memhash should be an internal detail,
 // but widely used packages access it using linkname.
@ -77,16 +76,21 @@ var useAeshash bool
 // Do not remove or change the type signature.
 // See go.dev/issue/67401.
 //
+//go:nosplit
 //go:linkname memhash
-func memhash(p unsafe.Pointer, h, s uintptr) uintptr
+func memhash(p unsafe.Pointer, h, s uintptr) uintptr {
+	return maps.MemHash(p, h, s)
+}

-// Accessed in internal/runtime/maps.
-//
-//go:linknamestd memhash32
-func memhash32(p unsafe.Pointer, h uintptr) uintptr
+//go:nosplit
+func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+	return maps.MemHash64(p, seed)
+}

-//go:linknamestd memhash64
-func memhash64(p unsafe.Pointer, h uintptr) uintptr
+//go:nosplit
+func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+	return maps.MemHash32(p, seed)
+}

 // strhash should be an internal detail,
 // but widely used packages access it using linkname.
@ -101,11 +105,8 @@ func memhash64(p unsafe.Pointer, h uintptr) uintptr
 // See go.dev/issue/67401.
 //
 //go:linkname strhash
-func strhash(p unsafe.Pointer, h uintptr) uintptr
-
-func strhashFallback(a unsafe.Pointer, h uintptr) uintptr {
-	x := (*stringStruct)(a)
-	return memhashFallback(x.str, h, uintptr(x.len))
+func strhash(p unsafe.Pointer, h uintptr) uintptr {
+	return maps.StrHash(p, h)
 }

 // NOTE: Because NaN != NaN, a map can contain any
@ -383,50 +384,6 @@ func ifaceHash(i interface {
 	return interhash(noescape(unsafe.Pointer(&i)), seed)
 }

-const hashRandomBytes = goarch.PtrSize / 4 * 64
-
-// used in asm_{386,amd64,arm64}.s to seed the hash function
-var aeskeysched [hashRandomBytes]byte
-
-// used in hash{32,64}.go to seed the hash function
-var hashkey [4]uintptr
-
-func alginit() {
-	// Install AES hash algorithms if the instructions needed are present.
-	if (GOARCH == "386" || GOARCH == "amd64") &&
-		cpu.X86.HasAES && // AESENC
-		cpu.X86.HasSSSE3 && // PSHUFB
-		cpu.X86.HasSSE41 { // PINSR{D,Q}
-		initAlgAES()
-		return
-	}
-	if GOARCH == "arm64" && cpu.ARM64.HasAES {
-		initAlgAES()
-		return
-	}
-	for i := range hashkey {
-		hashkey[i] = uintptr(bootstrapRand())
-	}
-}
-
-func initAlgAES() {
-	useAeshash = true
-	// Initialize with random data so hash collisions will be hard to engineer.
-	key := (*[hashRandomBytes / 8]uint64)(unsafe.Pointer(&aeskeysched))
-	for i := range key {
-		key[i] = bootstrapRand()
-	}
-}
-
-// Note: These routines perform the read with a native endianness.
-func readUnaligned32(p unsafe.Pointer) uint32 {
-	q := (*[4]byte)(p)
-	if goarch.BigEndian {
-		return byteorder.BEUint32(q[:])
-	}
-	return byteorder.LEUint32(q[:])
-}
-
 func readUnaligned64(p unsafe.Pointer) uint64 {
 	q := (*[8]byte)(p)
 	if goarch.BigEndian {
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@ -923,432 +923,6 @@ TEXT ldt0setup<>(SB),NOSPLIT,$16-0
 TEXT runtime·emptyfunc(SB),0,$0-0
 	RET

-// hash function using AES hardware instructions
-TEXT runtime·memhash(SB),NOSPLIT,$0-16
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	MOVL	p+0(FP), AX	// ptr to data
-	MOVL	s+8(FP), BX	// size
-	LEAL	ret+12(FP), DX
-	JMP	runtime·aeshashbody<>(SB)
-noaes:
-	JMP	runtime·memhashFallback(SB)
-
-TEXT runtime·strhash(SB),NOSPLIT,$0-12
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	MOVL	p+0(FP), AX	// ptr to string object
-	MOVL	4(AX), BX	// length of string
-	MOVL	(AX), AX	// string data
-	LEAL	ret+8(FP), DX
-	JMP	runtime·aeshashbody<>(SB)
-noaes:
-	JMP	runtime·strhashFallback(SB)
-
-// AX: data
-// BX: length
-// DX: address to put return value
-TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
-	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
-	PINSRW	$4, BX, X0	            // 16 bits of length
-	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
-	MOVO	X0, X1                      // save unscrambled seed
-	PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
-	AESENC	X0, X0                      // scramble seed
-
-	CMPL	BX, $16
-	JB	aes0to15
-	JE	aes16
-	CMPL	BX, $32
-	JBE	aes17to32
-	CMPL	BX, $64
-	JBE	aes33to64
-	JMP	aes65plus
-
-aes0to15:
-	TESTL	BX, BX
-	JE	aes0
-
-	ADDL	$16, AX
-	TESTW	$0xff0, AX
-	JE	endofpage
-
-	// 16 bytes loaded at this address won't cross
-	// a page boundary, so we can load it directly.
-	MOVOU	-16(AX), X1
-	ADDL	BX, BX
-	PAND	masks<>(SB)(BX*8), X1
-
-final1:
-	PXOR	X0, X1	// xor data with seed
-	AESENC	X1, X1  // scramble combo 3 times
-	AESENC	X1, X1
-	AESENC	X1, X1
-	MOVL	X1, (DX)
-	RET
-
-endofpage:
-	// address ends in 1111xxxx. Might be up against
-	// a page boundary, so load ending at last byte.
-	// Then shift bytes down using pshufb.
-	MOVOU	-32(AX)(BX*1), X1
-	ADDL	BX, BX
-	PSHUFB	shifts<>(SB)(BX*8), X1
-	JMP	final1
-
-aes0:
-	// Return scrambled input seed
-	AESENC	X0, X0
-	MOVL	X0, (DX)
-	RET
-
-aes16:
-	MOVOU	(AX), X1
-	JMP	final1
-
-aes17to32:
-	// make second starting seed
-	PXOR	runtime·aeskeysched+16(SB), X1
-	AESENC	X1, X1
-
-	// load data to be hashed
-	MOVOU	(AX), X2
-	MOVOU	-16(AX)(BX*1), X3
-
-	// xor with seed
-	PXOR	X0, X2
-	PXOR	X1, X3
-
-	// scramble 3 times
-	AESENC	X2, X2
-	AESENC	X3, X3
-	AESENC	X2, X2
-	AESENC	X3, X3
-	AESENC	X2, X2
-	AESENC	X3, X3
-
-	// combine results
-	PXOR	X3, X2
-	MOVL	X2, (DX)
-	RET
-
-aes33to64:
-	// make 3 more starting seeds
-	MOVO	X1, X2
-	MOVO	X1, X3
-	PXOR	runtime·aeskeysched+16(SB), X1
-	PXOR	runtime·aeskeysched+32(SB), X2
-	PXOR	runtime·aeskeysched+48(SB), X3
-	AESENC	X1, X1
-	AESENC	X2, X2
-	AESENC	X3, X3
-
-	MOVOU	(AX), X4
-	MOVOU	16(AX), X5
-	MOVOU	-32(AX)(BX*1), X6
-	MOVOU	-16(AX)(BX*1), X7
-
-	PXOR	X0, X4
-	PXOR	X1, X5
-	PXOR	X2, X6
-	PXOR	X3, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	PXOR	X6, X4
-	PXOR	X7, X5
-	PXOR	X5, X4
-	MOVL	X4, (DX)
-	RET
-
-aes65plus:
-	// make 3 more starting seeds
-	MOVO	X1, X2
-	MOVO	X1, X3
-	PXOR	runtime·aeskeysched+16(SB), X1
-	PXOR	runtime·aeskeysched+32(SB), X2
-	PXOR	runtime·aeskeysched+48(SB), X3
-	AESENC	X1, X1
-	AESENC	X2, X2
-	AESENC	X3, X3
-
-	// start with last (possibly overlapping) block
-	MOVOU	-64(AX)(BX*1), X4
-	MOVOU	-48(AX)(BX*1), X5
-	MOVOU	-32(AX)(BX*1), X6
-	MOVOU	-16(AX)(BX*1), X7
-
-	// scramble state once
-	AESENC	X0, X4
-	AESENC	X1, X5
-	AESENC	X2, X6
-	AESENC	X3, X7
-
-	// compute number of remaining 64-byte blocks
-	DECL	BX
-	SHRL	$6, BX
-
-aesloop:
-	// scramble state, xor in a block
-	MOVOU	(AX), X0
-	MOVOU	16(AX), X1
-	MOVOU	32(AX), X2
-	MOVOU	48(AX), X3
-	AESENC	X0, X4
-	AESENC	X1, X5
-	AESENC	X2, X6
-	AESENC	X3, X7
-
-	// scramble state
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	ADDL	$64, AX
-	DECL	BX
-	JNE	aesloop
-
-	// 3 more scrambles to finish
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	PXOR	X6, X4
-	PXOR	X7, X5
-	PXOR	X5, X4
-	MOVL	X4, (DX)
-	RET
-
-TEXT runtime·memhash32(SB),NOSPLIT,$0-12
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	MOVL	p+0(FP), AX	// ptr to data
-	MOVL	h+4(FP), X0	// seed
-	PINSRD	$1, (AX), X0	// data
-	AESENC	runtime·aeskeysched+0(SB), X0
-	AESENC	runtime·aeskeysched+16(SB), X0
-	AESENC	runtime·aeskeysched+32(SB), X0
-	MOVL	X0, ret+8(FP)
-	RET
-noaes:
-	JMP	runtime·memhash32Fallback(SB)
-
-TEXT runtime·memhash64(SB),NOSPLIT,$0-12
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	MOVL	p+0(FP), AX	// ptr to data
-	MOVQ	(AX), X0	// data
-	PINSRD	$2, h+4(FP), X0	// seed
-	AESENC	runtime·aeskeysched+0(SB), X0
-	AESENC	runtime·aeskeysched+16(SB), X0
-	AESENC	runtime·aeskeysched+32(SB), X0
-	MOVL	X0, ret+8(FP)
-	RET
-noaes:
-	JMP	runtime·memhash64Fallback(SB)
-
-// simple mask to get rid of data in the high part of the register.
-DATA masks<>+0x00(SB)/4, $0x00000000
-DATA masks<>+0x04(SB)/4, $0x00000000
-DATA masks<>+0x08(SB)/4, $0x00000000
-DATA masks<>+0x0c(SB)/4, $0x00000000
-
-DATA masks<>+0x10(SB)/4, $0x000000ff
-DATA masks<>+0x14(SB)/4, $0x00000000
-DATA masks<>+0x18(SB)/4, $0x00000000
-DATA masks<>+0x1c(SB)/4, $0x00000000
-
-DATA masks<>+0x20(SB)/4, $0x0000ffff
-DATA masks<>+0x24(SB)/4, $0x00000000
-DATA masks<>+0x28(SB)/4, $0x00000000
-DATA masks<>+0x2c(SB)/4, $0x00000000
-
-DATA masks<>+0x30(SB)/4, $0x00ffffff
-DATA masks<>+0x34(SB)/4, $0x00000000
-DATA masks<>+0x38(SB)/4, $0x00000000
-DATA masks<>+0x3c(SB)/4, $0x00000000
-
-DATA masks<>+0x40(SB)/4, $0xffffffff
-DATA masks<>+0x44(SB)/4, $0x00000000
-DATA masks<>+0x48(SB)/4, $0x00000000
-DATA masks<>+0x4c(SB)/4, $0x00000000
-
-DATA masks<>+0x50(SB)/4, $0xffffffff
-DATA masks<>+0x54(SB)/4, $0x000000ff
-DATA masks<>+0x58(SB)/4, $0x00000000
-DATA masks<>+0x5c(SB)/4, $0x00000000
-
-DATA masks<>+0x60(SB)/4, $0xffffffff
-DATA masks<>+0x64(SB)/4, $0x0000ffff
-DATA masks<>+0x68(SB)/4, $0x00000000
-DATA masks<>+0x6c(SB)/4, $0x00000000
-
-DATA masks<>+0x70(SB)/4, $0xffffffff
-DATA masks<>+0x74(SB)/4, $0x00ffffff
-DATA masks<>+0x78(SB)/4, $0x00000000
-DATA masks<>+0x7c(SB)/4, $0x00000000
-
-DATA masks<>+0x80(SB)/4, $0xffffffff
-DATA masks<>+0x84(SB)/4, $0xffffffff
-DATA masks<>+0x88(SB)/4, $0x00000000
-DATA masks<>+0x8c(SB)/4, $0x00000000
-
-DATA masks<>+0x90(SB)/4, $0xffffffff
-DATA masks<>+0x94(SB)/4, $0xffffffff
-DATA masks<>+0x98(SB)/4, $0x000000ff
-DATA masks<>+0x9c(SB)/4, $0x00000000
-
-DATA masks<>+0xa0(SB)/4, $0xffffffff
-DATA masks<>+0xa4(SB)/4, $0xffffffff
-DATA masks<>+0xa8(SB)/4, $0x0000ffff
-DATA masks<>+0xac(SB)/4, $0x00000000
-
-DATA masks<>+0xb0(SB)/4, $0xffffffff
-DATA masks<>+0xb4(SB)/4, $0xffffffff
-DATA masks<>+0xb8(SB)/4, $0x00ffffff
-DATA masks<>+0xbc(SB)/4, $0x00000000
-
-DATA masks<>+0xc0(SB)/4, $0xffffffff
-DATA masks<>+0xc4(SB)/4, $0xffffffff
-DATA masks<>+0xc8(SB)/4, $0xffffffff
-DATA masks<>+0xcc(SB)/4, $0x00000000
-
-DATA masks<>+0xd0(SB)/4, $0xffffffff
-DATA masks<>+0xd4(SB)/4, $0xffffffff
-DATA masks<>+0xd8(SB)/4, $0xffffffff
-DATA masks<>+0xdc(SB)/4, $0x000000ff
-
-DATA masks<>+0xe0(SB)/4, $0xffffffff
-DATA masks<>+0xe4(SB)/4, $0xffffffff
-DATA masks<>+0xe8(SB)/4, $0xffffffff
-DATA masks<>+0xec(SB)/4, $0x0000ffff
-
-DATA masks<>+0xf0(SB)/4, $0xffffffff
-DATA masks<>+0xf4(SB)/4, $0xffffffff
-DATA masks<>+0xf8(SB)/4, $0xffffffff
-DATA masks<>+0xfc(SB)/4, $0x00ffffff
-
-GLOBL masks<>(SB),RODATA,$256
-
-// these are arguments to pshufb. They move data down from
-// the high bytes of the register to the low bytes of the register.
-// index is how many bytes to move.
-DATA shifts<>+0x00(SB)/4, $0x00000000
-DATA shifts<>+0x04(SB)/4, $0x00000000
-DATA shifts<>+0x08(SB)/4, $0x00000000
-DATA shifts<>+0x0c(SB)/4, $0x00000000
-
-DATA shifts<>+0x10(SB)/4, $0xffffff0f
-DATA shifts<>+0x14(SB)/4, $0xffffffff
-DATA shifts<>+0x18(SB)/4, $0xffffffff
-DATA shifts<>+0x1c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x20(SB)/4, $0xffff0f0e
-DATA shifts<>+0x24(SB)/4, $0xffffffff
-DATA shifts<>+0x28(SB)/4, $0xffffffff
-DATA shifts<>+0x2c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
-DATA shifts<>+0x34(SB)/4, $0xffffffff
-DATA shifts<>+0x38(SB)/4, $0xffffffff
-DATA shifts<>+0x3c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
-DATA shifts<>+0x44(SB)/4, $0xffffffff
-DATA shifts<>+0x48(SB)/4, $0xffffffff
-DATA shifts<>+0x4c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
-DATA shifts<>+0x54(SB)/4, $0xffffff0f
-DATA shifts<>+0x58(SB)/4, $0xffffffff
-DATA shifts<>+0x5c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
-DATA shifts<>+0x64(SB)/4, $0xffff0f0e
-DATA shifts<>+0x68(SB)/4, $0xffffffff
-DATA shifts<>+0x6c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
-DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
-DATA shifts<>+0x78(SB)/4, $0xffffffff
-DATA shifts<>+0x7c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x80(SB)/4, $0x0b0a0908
-DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
-DATA shifts<>+0x88(SB)/4, $0xffffffff
-DATA shifts<>+0x8c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x90(SB)/4, $0x0a090807
-DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
-DATA shifts<>+0x98(SB)/4, $0xffffff0f
-DATA shifts<>+0x9c(SB)/4, $0xffffffff
-
-DATA shifts<>+0xa0(SB)/4, $0x09080706
-DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
-DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
-DATA shifts<>+0xac(SB)/4, $0xffffffff
-
-DATA shifts<>+0xb0(SB)/4, $0x08070605
-DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
-DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
-DATA shifts<>+0xbc(SB)/4, $0xffffffff
-
-DATA shifts<>+0xc0(SB)/4, $0x07060504
-DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
-DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
-DATA shifts<>+0xcc(SB)/4, $0xffffffff
-
-DATA shifts<>+0xd0(SB)/4, $0x06050403
-DATA shifts<>+0xd4(SB)/4, $0x0a090807
-DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
-DATA shifts<>+0xdc(SB)/4, $0xffffff0f
-
-DATA shifts<>+0xe0(SB)/4, $0x05040302
-DATA shifts<>+0xe4(SB)/4, $0x09080706
-DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
-DATA shifts<>+0xec(SB)/4, $0xffff0f0e
-
-DATA shifts<>+0xf0(SB)/4, $0x04030201
-DATA shifts<>+0xf4(SB)/4, $0x08070605
-DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
-DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
-
-GLOBL shifts<>(SB),RODATA,$256
-
-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
-	MOVL	$masks<>(SB), AX
-	MOVL	$shifts<>(SB), BX
-	ORL	BX, AX
-	TESTL	$15, AX
-	SETEQ	ret+0(FP)
-	RET
-
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
 TEXT _cgo_topofstack(SB),NOSPLIT,$0
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@ -1243,483 +1243,6 @@ fences:
 	RDTSC
 	JMP done

-// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
-// hash function using AES hardware instructions
-TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
-	// AX = ptr to data
-	// BX = seed
-	// CX = size
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	JMP	runtime·aeshashbody<>(SB)
-noaes:
-	JMP	runtime·memhashFallback<ABIInternal>(SB)
-
-// func strhash(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
-	// AX = ptr to string struct
-	// BX = seed
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	MOVQ	8(AX), CX	// length of string
-	MOVQ	(AX), AX	// string data
-	JMP	runtime·aeshashbody<>(SB)
-noaes:
-	JMP	runtime·strhashFallback<ABIInternal>(SB)
-
-// AX: data
-// BX: hash seed
-// CX: length
-// At return: AX = return value
-TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
-	// Fill an SSE register with our seeds.
-	MOVQ	BX, X0				// 64 bits of per-table hash seed
-	PINSRW	$4, CX, X0			// 16 bits of length
-	PSHUFHW $0, X0, X0			// repeat length 4 times total
-	MOVO	X0, X1				// save unscrambled seed
-	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
-	AESENC	X0, X0				// scramble seed
-
-	CMPQ	CX, $16
-	JB	aes0to15
-	JE	aes16
-	CMPQ	CX, $32
-	JBE	aes17to32
-	CMPQ	CX, $64
-	JBE	aes33to64
-	CMPQ	CX, $128
-	JBE	aes65to128
-	JMP	aes129plus
-
-aes0to15:
-	TESTQ	CX, CX
-	JE	aes0
-
-	ADDQ	$16, AX
-	TESTW	$0xff0, AX
-	JE	endofpage
-
-	// 16 bytes loaded at this address won't cross
-	// a page boundary, so we can load it directly.
-	MOVOU	-16(AX), X1
-	ADDQ	CX, CX
-	MOVQ	$masks<>(SB), AX
-	PAND	(AX)(CX*8), X1
-final1:
-	PXOR	X0, X1	// xor data with seed
-	AESENC	X1, X1	// scramble combo 3 times
-	AESENC	X1, X1
-	AESENC	X1, X1
-	MOVQ	X1, AX	// return X1
-	RET
-
-endofpage:
-	// address ends in 1111xxxx. Might be up against
-	// a page boundary, so load ending at last byte.
-	// Then shift bytes down using pshufb.
-	MOVOU	-32(AX)(CX*1), X1
-	ADDQ	CX, CX
-	MOVQ	$shifts<>(SB), AX
-	PSHUFB	(AX)(CX*8), X1
-	JMP	final1
-
-aes0:
-	// Return scrambled input seed
-	AESENC	X0, X0
-	MOVQ	X0, AX	// return X0
-	RET
-
-aes16:
-	MOVOU	(AX), X1
-	JMP	final1
-
-aes17to32:
-	// make second starting seed
-	PXOR	runtime·aeskeysched+16(SB), X1
-	AESENC	X1, X1
-
-	// load data to be hashed
-	MOVOU	(AX), X2
-	MOVOU	-16(AX)(CX*1), X3
-
-	// xor with seed
-	PXOR	X0, X2
-	PXOR	X1, X3
-
-	// scramble 3 times
-	AESENC	X2, X2
-	AESENC	X3, X3
-	AESENC	X2, X2
-	AESENC	X3, X3
-	AESENC	X2, X2
-	AESENC	X3, X3
-
-	// combine results
-	PXOR	X3, X2
-	MOVQ	X2, AX	// return X2
-	RET
-
-aes33to64:
-	// make 3 more starting seeds
-	MOVO	X1, X2
-	MOVO	X1, X3
-	PXOR	runtime·aeskeysched+16(SB), X1
-	PXOR	runtime·aeskeysched+32(SB), X2
-	PXOR	runtime·aeskeysched+48(SB), X3
-	AESENC	X1, X1
-	AESENC	X2, X2
-	AESENC	X3, X3
-
-	MOVOU	(AX), X4
-	MOVOU	16(AX), X5
-	MOVOU	-32(AX)(CX*1), X6
-	MOVOU	-16(AX)(CX*1), X7
-
-	PXOR	X0, X4
-	PXOR	X1, X5
-	PXOR	X2, X6
-	PXOR	X3, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	PXOR	X6, X4
-	PXOR	X7, X5
-	PXOR	X5, X4
-	MOVQ	X4, AX	// return X4
-	RET
-
-aes65to128:
-	// make 7 more starting seeds
-	MOVO	X1, X2
-	MOVO	X1, X3
-	MOVO	X1, X4
-	MOVO	X1, X5
-	MOVO	X1, X6
-	MOVO	X1, X7
-	PXOR	runtime·aeskeysched+16(SB), X1
-	PXOR	runtime·aeskeysched+32(SB), X2
-	PXOR	runtime·aeskeysched+48(SB), X3
-	PXOR	runtime·aeskeysched+64(SB), X4
-	PXOR	runtime·aeskeysched+80(SB), X5
-	PXOR	runtime·aeskeysched+96(SB), X6
-	PXOR	runtime·aeskeysched+112(SB), X7
-	AESENC	X1, X1
-	AESENC	X2, X2
-	AESENC	X3, X3
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	// load data
-	MOVOU	(AX), X8
-	MOVOU	16(AX), X9
-	MOVOU	32(AX), X10
-	MOVOU	48(AX), X11
-	MOVOU	-64(AX)(CX*1), X12
-	MOVOU	-48(AX)(CX*1), X13
-	MOVOU	-32(AX)(CX*1), X14
-	MOVOU	-16(AX)(CX*1), X15
-
-	// xor with seed
-	PXOR	X0, X8
-	PXOR	X1, X9
-	PXOR	X2, X10
-	PXOR	X3, X11
-	PXOR	X4, X12
-	PXOR	X5, X13
-	PXOR	X6, X14
-	PXOR	X7, X15
-
-	// scramble 3 times
-	AESENC	X8, X8
-	AESENC	X9, X9
-	AESENC	X10, X10
-	AESENC	X11, X11
-	AESENC	X12, X12
-	AESENC	X13, X13
-	AESENC	X14, X14
-	AESENC	X15, X15
-
-	AESENC	X8, X8
-	AESENC	X9, X9
-	AESENC	X10, X10
-	AESENC	X11, X11
-	AESENC	X12, X12
-	AESENC	X13, X13
-	AESENC	X14, X14
-	AESENC	X15, X15
-
-	AESENC	X8, X8
-	AESENC	X9, X9
-	AESENC	X10, X10
-	AESENC	X11, X11
-	AESENC	X12, X12
-	AESENC	X13, X13
-	AESENC	X14, X14
-	AESENC	X15, X15
-
-	// combine results
-	PXOR	X12, X8
-	PXOR	X13, X9
-	PXOR	X14, X10
-	PXOR	X15, X11
-	PXOR	X10, X8
-	PXOR	X11, X9
-	PXOR	X9, X8
-	// X15 must be zero on return
-	PXOR	X15, X15
-	MOVQ	X8, AX	// return X8
-	RET
-
-aes129plus:
-	// make 7 more starting seeds
-	MOVO	X1, X2
-	MOVO	X1, X3
-	MOVO	X1, X4
-	MOVO	X1, X5
-	MOVO	X1, X6
-	MOVO	X1, X7
-	PXOR	runtime·aeskeysched+16(SB), X1
-	PXOR	runtime·aeskeysched+32(SB), X2
-	PXOR	runtime·aeskeysched+48(SB), X3
-	PXOR	runtime·aeskeysched+64(SB), X4
-	PXOR	runtime·aeskeysched+80(SB), X5
-	PXOR	runtime·aeskeysched+96(SB), X6
-	PXOR	runtime·aeskeysched+112(SB), X7
-	AESENC	X1, X1
-	AESENC	X2, X2
-	AESENC	X3, X3
-	AESENC	X4, X4
-	AESENC	X5, X5
-	AESENC	X6, X6
-	AESENC	X7, X7
-
-	// start with last (possibly overlapping) block
-	MOVOU	-128(AX)(CX*1), X8
-	MOVOU	-112(AX)(CX*1), X9
-	MOVOU	-96(AX)(CX*1), X10
-	MOVOU	-80(AX)(CX*1), X11
-	MOVOU	-64(AX)(CX*1), X12
-	MOVOU	-48(AX)(CX*1), X13
-	MOVOU	-32(AX)(CX*1), X14
-	MOVOU	-16(AX)(CX*1), X15
-
-	// xor in seed
-	PXOR	X0, X8
-	PXOR	X1, X9
-	PXOR	X2, X10
-	PXOR	X3, X11
-	PXOR	X4, X12
-	PXOR	X5, X13
-	PXOR	X6, X14
-	PXOR	X7, X15
-
-	// compute number of remaining 128-byte blocks
-	DECQ	CX
-	SHRQ	$7, CX
-
-	PCALIGN $16
-aesloop:
-	// scramble state
-	AESENC	X8, X8
-	AESENC	X9, X9
-	AESENC	X10, X10
-	AESENC	X11, X11
-	AESENC	X12, X12
-	AESENC	X13, X13
-	AESENC	X14, X14
-	AESENC	X15, X15
-
-	// scramble state, xor in a block
-	MOVOU	(AX), X0
-	MOVOU	16(AX), X1
-	MOVOU	32(AX), X2
-	MOVOU	48(AX), X3
-	AESENC	X0, X8
-	AESENC	X1, X9
-	AESENC	X2, X10
-	AESENC	X3, X11
-	MOVOU	64(AX), X4
-	MOVOU	80(AX), X5
-	MOVOU	96(AX), X6
-	MOVOU	112(AX), X7
-	AESENC	X4, X12
-	AESENC	X5, X13
-	AESENC	X6, X14
-	AESENC	X7, X15
-
-	ADDQ	$128, AX
-	DECQ	CX
-	JNE	aesloop
-
-	// 3 more scrambles to finish
-	AESENC	X8, X8
-	AESENC	X9, X9
-	AESENC	X10, X10
-	AESENC	X11, X11
-	AESENC	X12, X12
-	AESENC	X13, X13
-	AESENC	X14, X14
-	AESENC	X15, X15
-	AESENC	X8, X8
-	AESENC	X9, X9
-	AESENC	X10, X10
-	AESENC	X11, X11
-	AESENC	X12, X12
-	AESENC	X13, X13
-	AESENC	X14, X14
-	AESENC	X15, X15
-	AESENC	X8, X8
-	AESENC	X9, X9
-	AESENC	X10, X10
-	AESENC	X11, X11
-	AESENC	X12, X12
-	AESENC	X13, X13
-	AESENC	X14, X14
-	AESENC	X15, X15
-
-	PXOR	X12, X8
-	PXOR	X13, X9
-	PXOR	X14, X10
-	PXOR	X15, X11
-	PXOR	X10, X8
-	PXOR	X11, X9
-	PXOR	X9, X8
-	// X15 must be zero on return
-	PXOR	X15, X15
-	MOVQ	X8, AX	// return X8
-	RET
-
-// func memhash32(p unsafe.Pointer, h uintptr) uintptr
-// ABIInternal for performance.
-TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
-	// AX = ptr to data
-	// BX = seed
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	MOVQ	BX, X0	// X0 = seed
-	PINSRD	$2, (AX), X0	// data
-	AESENC	runtime·aeskeysched+0(SB), X0
-	AESENC	runtime·aeskeysched+16(SB), X0
-	AESENC	runtime·aeskeysched+32(SB), X0
-	MOVQ	X0, AX	// return X0
-	RET
-noaes:
-	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
-
-// func memhash64(p unsafe.Pointer, h uintptr) uintptr
-// ABIInternal for performance.
-TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
-	// AX = ptr to data
-	// BX = seed
-	CMPB	runtime·useAeshash(SB), $0
-	JEQ	noaes
-	MOVQ	BX, X0	// X0 = seed
-	PINSRQ	$1, (AX), X0	// data
-	AESENC	runtime·aeskeysched+0(SB), X0
-	AESENC	runtime·aeskeysched+16(SB), X0
-	AESENC	runtime·aeskeysched+32(SB), X0
-	MOVQ	X0, AX	// return X0
-	RET
-noaes:
-	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
-
-// simple mask to get rid of data in the high part of the register.
-DATA masks<>+0x00(SB)/8, $0x0000000000000000
-DATA masks<>+0x08(SB)/8, $0x0000000000000000
-DATA masks<>+0x10(SB)/8, $0x00000000000000ff
-DATA masks<>+0x18(SB)/8, $0x0000000000000000
-DATA masks<>+0x20(SB)/8, $0x000000000000ffff
-DATA masks<>+0x28(SB)/8, $0x0000000000000000
-DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
-DATA masks<>+0x38(SB)/8, $0x0000000000000000
-DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
-DATA masks<>+0x48(SB)/8, $0x0000000000000000
-DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
-DATA masks<>+0x58(SB)/8, $0x0000000000000000
-DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
-DATA masks<>+0x68(SB)/8, $0x0000000000000000
-DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
-DATA masks<>+0x78(SB)/8, $0x0000000000000000
-DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
-DATA masks<>+0x88(SB)/8, $0x0000000000000000
-DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
-DATA masks<>+0x98(SB)/8, $0x00000000000000ff
-DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
-DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
-DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
-DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
-DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
-DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
-DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
-DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
-DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
-DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
-DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
-DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
-GLOBL masks<>(SB),RODATA,$256
-
-// func checkASM() bool
-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
-	MOVQ	$masks<>(SB), AX
-	MOVQ	$shifts<>(SB), BX
-	ORQ	BX, AX
-	TESTQ	$15, AX
-	SETEQ	ret+0(FP)
-	RET
-
-// these are arguments to pshufb. They move data down from
-// the high bytes of the register to the low bytes of the register.
-// index is how many bytes to move.
-DATA shifts<>+0x00(SB)/8, $0x0000000000000000
-DATA shifts<>+0x08(SB)/8, $0x0000000000000000
-DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
-DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
-DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
-DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
-DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
-DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
-DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
-DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
-DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
-DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
-DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
-DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
-DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
-DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
-DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
-DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
-DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
-DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
-DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
-DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
-DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
-DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
-DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
-GLOBL shifts<>(SB),RODATA,$256
-
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
 TEXT _cgo_topofstack(SB),NOSPLIT,$0
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@ -809,16 +809,6 @@ TEXT runtime·armPublicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
 	DMB	MB_ST
 	RET

-// AES hashing not implemented for ARM
-TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-16
-	JMP	runtime·memhashFallback(SB)
-TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-12
-	JMP	runtime·strhashFallback(SB)
-TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-12
-	JMP	runtime·memhash32Fallback(SB)
-TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-12
-	JMP	runtime·memhash64Fallback(SB)
-
 TEXT runtime·procyieldAsm(SB),NOSPLIT|NOFRAME,$0
 	MOVW	cycles+0(FP), R1
 	MOVW	$0, R0
@ -887,11 +877,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
 	MOVW	saver9-4(SP), R9
 	RET

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOVW	$1, R3
-	MOVB	R3, ret+0(FP)
-	RET
-
 // gcWriteBarrier informs the GC about heap pointer writes.
 //
 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@ -674,391 +674,6 @@ CALLFN(·call268435456, 268435456)
 CALLFN(·call536870912, 536870912)
 CALLFN(·call1073741824, 1073741824)

-// func memhash32(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	MOVB	runtime·useAeshash(SB), R10
-	CBZ	R10, noaes
-	MOVD	$runtime·aeskeysched+0(SB), R3
-
-	VEOR	V0.B16, V0.B16, V0.B16
-	VLD1	(R3), [V2.B16]
-	VLD1	(R0), V0.S[2]
-	VMOV	R1, V0.D[0]
-
-	AESE	V2.B16, V0.B16
-	AESMC	V0.B16, V0.B16
-	AESE	V2.B16, V0.B16
-	AESMC	V0.B16, V0.B16
-	AESE	V2.B16, V0.B16
-
-	VMOV	V0.D[0], R0
-	RET
-noaes:
-	B	runtime·memhash32Fallback<ABIInternal>(SB)
-
-// func memhash64(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	MOVB	runtime·useAeshash(SB), R10
-	CBZ	R10, noaes
-	MOVD	$runtime·aeskeysched+0(SB), R3
-
-	VEOR	V0.B16, V0.B16, V0.B16
-	VLD1	(R3), [V2.B16]
-	VLD1	(R0), V0.D[1]
-	VMOV	R1, V0.D[0]
-
-	AESE	V2.B16, V0.B16
-	AESMC	V0.B16, V0.B16
-	AESE	V2.B16, V0.B16
-	AESMC	V0.B16, V0.B16
-	AESE	V2.B16, V0.B16
-
-	VMOV	V0.D[0], R0
-	RET
-noaes:
-	B	runtime·memhash64Fallback<ABIInternal>(SB)
-
-// func memhash(p unsafe.Pointer, h, size uintptr) uintptr
-TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
-	MOVB	runtime·useAeshash(SB), R10
-	CBZ	R10, noaes
-	B	runtime·aeshashbody<>(SB)
-noaes:
-	B	runtime·memhashFallback<ABIInternal>(SB)
-
-// func strhash(p unsafe.Pointer, h uintptr) uintptr
-TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	MOVB	runtime·useAeshash(SB), R10
-	CBZ	R10, noaes
-	LDP	(R0), (R0, R2)	// string data / length
-	B	runtime·aeshashbody<>(SB)
-noaes:
-	B	runtime·strhashFallback<ABIInternal>(SB)
-
-// R0: data
-// R1: seed data
-// R2: length
-// At return, R0 = return value
-TEXT runtime·aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
-	VEOR	V30.B16, V30.B16, V30.B16
-	VMOV	R1, V30.D[0]
-	VMOV	R2, V30.D[1] // load length into seed
-
-	MOVD	$runtime·aeskeysched+0(SB), R4
-	VLD1.P	16(R4), [V0.B16]
-	AESE	V30.B16, V0.B16
-	AESMC	V0.B16, V0.B16
-	CMP	$16, R2
-	BLO	aes0to15
-	BEQ	aes16
-	CMP	$32, R2
-	BLS	aes17to32
-	CMP	$64, R2
-	BLS	aes33to64
-	CMP	$128, R2
-	BLS	aes65to128
-	B	aes129plus
-
-aes0to15:
-	CBZ	R2, aes0
-	VEOR	V2.B16, V2.B16, V2.B16
-	TBZ	$3, R2, less_than_8
-	VLD1.P	8(R0), V2.D[0]
-
-less_than_8:
-	TBZ	$2, R2, less_than_4
-	VLD1.P	4(R0), V2.S[2]
-
-less_than_4:
-	TBZ	$1, R2, less_than_2
-	VLD1.P	2(R0), V2.H[6]
-
-less_than_2:
-	TBZ	$0, R2, done
-	VLD1	(R0), V2.B[14]
-done:
-	AESE	V0.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-	AESE	V0.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-	AESE	V0.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-
-	VMOV	V2.D[0], R0
-	RET
-
-aes0:
-	VMOV	V0.D[0], R0
-	RET
-
-aes16:
-	VLD1	(R0), [V2.B16]
-	B	done
-
-aes17to32:
-	// make second seed
-	VLD1	(R4), [V1.B16]
-	AESE	V30.B16, V1.B16
-	AESMC	V1.B16, V1.B16
-	SUB	$16, R2, R10
-	VLD1.P	(R0)(R10), [V2.B16]
-	VLD1	(R0), [V3.B16]
-
-	AESE	V0.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-	AESE	V1.B16, V3.B16
-	AESMC	V3.B16, V3.B16
-
-	AESE	V0.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-	AESE	V1.B16, V3.B16
-	AESMC	V3.B16, V3.B16
-
-	AESE	V0.B16, V2.B16
-	AESE	V1.B16, V3.B16
-
-	VEOR	V3.B16, V2.B16, V2.B16
-
-	VMOV	V2.D[0], R0
-	RET
-
-aes33to64:
-	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
-	AESE	V30.B16, V1.B16
-	AESMC	V1.B16, V1.B16
-	AESE	V30.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-	AESE	V30.B16, V3.B16
-	AESMC	V3.B16, V3.B16
-	SUB	$32, R2, R10
-
-	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
-	VLD1	(R0), [V6.B16, V7.B16]
-
-	AESE	V0.B16, V4.B16
-	AESMC	V4.B16, V4.B16
-	AESE	V1.B16, V5.B16
-	AESMC	V5.B16, V5.B16
-	AESE	V2.B16, V6.B16
-	AESMC	V6.B16, V6.B16
-	AESE	V3.B16, V7.B16
-	AESMC	V7.B16, V7.B16
-
-	AESE	V0.B16, V4.B16
-	AESMC	V4.B16, V4.B16
-	AESE	V1.B16, V5.B16
-	AESMC	V5.B16, V5.B16
-	AESE	V2.B16, V6.B16
-	AESMC	V6.B16, V6.B16
-	AESE	V3.B16, V7.B16
-	AESMC	V7.B16, V7.B16
-
-	AESE	V0.B16, V4.B16
-	AESE	V1.B16, V5.B16
-	AESE	V2.B16, V6.B16
-	AESE	V3.B16, V7.B16
-
-	VEOR	V6.B16, V4.B16, V4.B16
-	VEOR	V7.B16, V5.B16, V5.B16
-	VEOR	V5.B16, V4.B16, V4.B16
-
-	VMOV	V4.D[0], R0
-	RET
-
-aes65to128:
-	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
-	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
-	AESE	V30.B16, V1.B16
-	AESMC	V1.B16, V1.B16
-	AESE	V30.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-	AESE	V30.B16, V3.B16
-	AESMC	V3.B16, V3.B16
-	AESE	V30.B16, V4.B16
-	AESMC	V4.B16, V4.B16
-	AESE	V30.B16, V5.B16
-	AESMC	V5.B16, V5.B16
-	AESE	V30.B16, V6.B16
-	AESMC	V6.B16, V6.B16
-	AESE	V30.B16, V7.B16
-	AESMC	V7.B16, V7.B16
-
-	SUB	$64, R2, R10
-	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
-	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
-	AESE	V0.B16,	 V8.B16
-	AESMC	V8.B16,  V8.B16
-	AESE	V1.B16,	 V9.B16
-	AESMC	V9.B16,  V9.B16
-	AESE	V2.B16, V10.B16
-	AESMC	V10.B16,  V10.B16
-	AESE	V3.B16, V11.B16
-	AESMC	V11.B16,  V11.B16
-	AESE	V4.B16, V12.B16
-	AESMC	V12.B16,  V12.B16
-	AESE	V5.B16, V13.B16
-	AESMC	V13.B16,  V13.B16
-	AESE	V6.B16, V14.B16
-	AESMC	V14.B16,  V14.B16
-	AESE	V7.B16, V15.B16
-	AESMC	V15.B16,  V15.B16
-
-	AESE	V0.B16,	 V8.B16
-	AESMC	V8.B16,  V8.B16
-	AESE	V1.B16,	 V9.B16
-	AESMC	V9.B16,  V9.B16
-	AESE	V2.B16, V10.B16
-	AESMC	V10.B16,  V10.B16
-	AESE	V3.B16, V11.B16
-	AESMC	V11.B16,  V11.B16
-	AESE	V4.B16, V12.B16
-	AESMC	V12.B16,  V12.B16
-	AESE	V5.B16, V13.B16
-	AESMC	V13.B16,  V13.B16
-	AESE	V6.B16, V14.B16
-	AESMC	V14.B16,  V14.B16
-	AESE	V7.B16, V15.B16
-	AESMC	V15.B16,  V15.B16
-
-	AESE	V0.B16,	 V8.B16
-	AESE	V1.B16,	 V9.B16
-	AESE	V2.B16, V10.B16
-	AESE	V3.B16, V11.B16
-	AESE	V4.B16, V12.B16
-	AESE	V5.B16, V13.B16
-	AESE	V6.B16, V14.B16
-	AESE	V7.B16, V15.B16
-
-	VEOR	V12.B16, V8.B16, V8.B16
-	VEOR	V13.B16, V9.B16, V9.B16
-	VEOR	V14.B16, V10.B16, V10.B16
-	VEOR	V15.B16, V11.B16, V11.B16
-	VEOR	V10.B16, V8.B16, V8.B16
-	VEOR	V11.B16, V9.B16, V9.B16
-	VEOR	V9.B16, V8.B16, V8.B16
-
-	VMOV	V8.D[0], R0
-	RET
-
-aes129plus:
-	PRFM (R0), PLDL1KEEP
-	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
-	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
-	AESE	V30.B16, V1.B16
-	AESMC	V1.B16, V1.B16
-	AESE	V30.B16, V2.B16
-	AESMC	V2.B16, V2.B16
-	AESE	V30.B16, V3.B16
-	AESMC	V3.B16, V3.B16
-	AESE	V30.B16, V4.B16
-	AESMC	V4.B16, V4.B16
-	AESE	V30.B16, V5.B16
-	AESMC	V5.B16, V5.B16
-	AESE	V30.B16, V6.B16
-	AESMC	V6.B16, V6.B16
-	AESE	V30.B16, V7.B16
-	AESMC	V7.B16, V7.B16
-	ADD	R0, R2, R10
-	SUB	$128, R10, R10
-	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
-	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
-	SUB	$1, R2, R2
-	LSR	$7, R2, R2
-
-aesloop:
-	AESE	V8.B16,	 V0.B16
-	AESMC	V0.B16,  V0.B16
-	AESE	V9.B16,	 V1.B16
-	AESMC	V1.B16,  V1.B16
-	AESE	V10.B16, V2.B16
-	AESMC	V2.B16,  V2.B16
-	AESE	V11.B16, V3.B16
-	AESMC	V3.B16,  V3.B16
-	AESE	V12.B16, V4.B16
-	AESMC	V4.B16,  V4.B16
-	AESE	V13.B16, V5.B16
-	AESMC	V5.B16,  V5.B16
-	AESE	V14.B16, V6.B16
-	AESMC	V6.B16,  V6.B16
-	AESE	V15.B16, V7.B16
-	AESMC	V7.B16,  V7.B16
-
-	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
-	AESE	V8.B16,	 V0.B16
-	AESMC	V0.B16,  V0.B16
-	AESE	V9.B16,	 V1.B16
-	AESMC	V1.B16,  V1.B16
-	AESE	V10.B16, V2.B16
-	AESMC	V2.B16,  V2.B16
-	AESE	V11.B16, V3.B16
-	AESMC	V3.B16,  V3.B16
-
-	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
-	AESE	V12.B16, V4.B16
-	AESMC	V4.B16,  V4.B16
-	AESE	V13.B16, V5.B16
-	AESMC	V5.B16,  V5.B16
-	AESE	V14.B16, V6.B16
-	AESMC	V6.B16,  V6.B16
-	AESE	V15.B16, V7.B16
-	AESMC	V7.B16,  V7.B16
-	SUB	$1, R2, R2
-	CBNZ	R2, aesloop
-
-	AESE	V8.B16,	 V0.B16
-	AESMC	V0.B16,  V0.B16
-	AESE	V9.B16,	 V1.B16
-	AESMC	V1.B16,  V1.B16
-	AESE	V10.B16, V2.B16
-	AESMC	V2.B16,  V2.B16
-	AESE	V11.B16, V3.B16
-	AESMC	V3.B16,  V3.B16
-	AESE	V12.B16, V4.B16
-	AESMC	V4.B16,  V4.B16
-	AESE	V13.B16, V5.B16
-	AESMC	V5.B16,  V5.B16
-	AESE	V14.B16, V6.B16
-	AESMC	V6.B16,  V6.B16
-	AESE	V15.B16, V7.B16
-	AESMC	V7.B16,  V7.B16
-
-	AESE	V8.B16,	 V0.B16
-	AESMC	V0.B16,  V0.B16
-	AESE	V9.B16,	 V1.B16
-	AESMC	V1.B16,  V1.B16
-	AESE	V10.B16, V2.B16
-	AESMC	V2.B16,  V2.B16
-	AESE	V11.B16, V3.B16
-	AESMC	V3.B16,  V3.B16
-	AESE	V12.B16, V4.B16
-	AESMC	V4.B16,  V4.B16
-	AESE	V13.B16, V5.B16
-	AESMC	V5.B16,  V5.B16
-	AESE	V14.B16, V6.B16
-	AESMC	V6.B16,  V6.B16
-	AESE	V15.B16, V7.B16
-	AESMC	V7.B16,  V7.B16
-
-	AESE	V8.B16,	 V0.B16
-	AESE	V9.B16,	 V1.B16
-	AESE	V10.B16, V2.B16
-	AESE	V11.B16, V3.B16
-	AESE	V12.B16, V4.B16
-	AESE	V13.B16, V5.B16
-	AESE	V14.B16, V6.B16
-	AESE	V15.B16, V7.B16
-
-	VEOR	V0.B16, V1.B16, V0.B16
-	VEOR	V2.B16, V3.B16, V2.B16
-	VEOR	V4.B16, V5.B16, V4.B16
-	VEOR	V6.B16, V7.B16, V6.B16
-	VEOR	V0.B16, V2.B16, V0.B16
-	VEOR	V4.B16, V6.B16, V4.B16
-	VEOR	V4.B16, V0.B16, V0.B16
-
-	VMOV	V0.D[0], R0
-	RET
-
 // The Arm architecture provides a user space accessible counter-timer which
 // is incremented at a fixed but machine-specific rate. Software can (spin)
 // wait until the counter-timer reaches some desired value.
@ -1435,11 +1050,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
 	ADD	$0x10, RSP
 	RET

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOVW	$1, R3
-	MOVB	R3, ret+0(FP)
-	RET
-
 // gcWriteBarrier informs the GC about heap pointer writes.
 //
 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
--- a/src/runtime/asm_loong64.s
+++ b/src/runtime/asm_loong64.s
@ -725,16 +725,6 @@ TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R0
 	UNDEF

-// AES hashing not implemented for loong64
-TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
-	JMP	runtime·memhashFallback<ABIInternal>(SB)
-TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·strhashFallback<ABIInternal>(SB)
-TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
-TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
-
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
 TEXT _cgo_topofstack(SB),NOSPLIT,$16
@ -771,11 +761,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
 	ADDV	$0x10, R3
 	RET

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOVW	$1, R19
-	MOVB	R19, ret+0(FP)
-	RET
-
 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R25.
 TEXT ·spillArgs(SB),NOSPLIT,$0-0
 	MOVV	R4, (0*8)(R25)
--- a/src/runtime/asm_mips64x.s
+++ b/src/runtime/asm_mips64x.s
@ -646,16 +646,6 @@ TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R0
 	UNDEF

-// AES hashing not implemented for mips64
-TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
-	JMP	runtime·memhashFallback(SB)
-TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·strhashFallback(SB)
-TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash32Fallback(SB)
-TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash64Fallback(SB)
-
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
 TEXT _cgo_topofstack(SB),NOSPLIT,$16
@ -681,11 +671,6 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
 	// traceback from goexit1 must hit code range of goexit
 	NOR	R0, R0	// NOP

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOVW	$1, R1
-	MOVB	R1, ret+0(FP)
-	RET
-
 // gcWriteBarrier informs the GC about heap pointer writes.
 //
 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
--- a/src/runtime/asm_mipsx.s
+++ b/src/runtime/asm_mipsx.s
@ -621,16 +621,6 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0
 TEXT runtime·abort(SB),NOSPLIT,$0-0
 	UNDEF

-// AES hashing not implemented for mips
-TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-16
-	JMP	runtime·memhashFallback(SB)
-TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-12
-	JMP	runtime·strhashFallback(SB)
-TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-12
-	JMP	runtime·memhash32Fallback(SB)
-TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-12
-	JMP	runtime·memhash64Fallback(SB)
-
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
 TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
@ -659,11 +649,6 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
 	// traceback from goexit1 must hit code range of goexit
 	NOR	R0, R0	// NOP

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOVW	$1, R1
-	MOVB	R1, ret+0(FP)
-	RET
-
 // gcWriteBarrier informs the GC about heap pointer writes.
 //
 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@ -1019,16 +1019,6 @@ TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0
 	FMOVD	184(R20), F12
 	RET

-// AES hashing not implemented for ppc64
-TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
-	JMP	runtime·memhashFallback<ABIInternal>(SB)
-TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·strhashFallback<ABIInternal>(SB)
-TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
-TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
-
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
 #ifdef GOOS_aix
@ -1088,11 +1078,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
 	ADD	$8, R1
 	RET

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOVW	$1, R3
-	MOVB	R3, ret+0(FP)
-	RET
-
 // gcWriteBarrier informs the GC about heap pointer writes.
 //
 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
--- a/src/runtime/asm_riscv64.s
+++ b/src/runtime/asm_riscv64.s
@ -274,16 +274,6 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
 	MOV	ZERO, CTXT
 	JMP	runtime·morestack(SB)

-// AES hashing not implemented for riscv64
-TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
-	JMP	runtime·memhashFallback<ABIInternal>(SB)
-TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·strhashFallback<ABIInternal>(SB)
-TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
-TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
-
 // restore state from Gobuf; longjmp

 // func gogo(buf *gobuf)
@ -739,11 +729,6 @@ TEXT runtime·setg(SB), NOSPLIT, $0-8
 	CALL	runtime·save_g(SB)
 	RET

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOV	$1, T0
-	MOV	T0, ret+0(FP)
-	RET
-
 // spillArgs stores return values from registers to a *internal/abi.RegArgs in X25.
 TEXT ·spillArgs(SB),NOSPLIT,$0-0
 	MOV	X10, (0*8)(X25)
--- a/src/runtime/asm_s390x.s
+++ b/src/runtime/asm_s390x.s
@ -842,16 +842,6 @@ TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0
 	FMOVD	184(R10), F15
 	RET

-// AES hashing not implemented for s390x
-TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
-	JMP	runtime·memhashFallback<ABIInternal>(SB)
-TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·strhashFallback<ABIInternal>(SB)
-TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
-TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
-
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
 TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
@ -904,10 +894,6 @@ TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
 	LMG	48(R15), R6, R15
 	RET

-TEXT ·checkASM(SB),NOSPLIT,$0-1
-	MOVB	$1, ret+0(FP)
-	RET
-
 // gcWriteBarrier informs the GC about heap pointer writes.
 //
 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
--- a/src/runtime/asm_wasm.s
+++ b/src/runtime/asm_wasm.s
@ -32,11 +32,6 @@ TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
 DATA  runtime·mainPC+0(SB)/8,$runtime·main(SB)
 GLOBL runtime·mainPC(SB),RODATA,$8

-// func checkASM() bool
-TEXT ·checkASM(SB), NOSPLIT, $0-1
-	MOVB $1, ret+0(FP)
-	RET
-
 TEXT runtime·gogo(SB), NOSPLIT, $0-8
 	MOVD buf+0(FP), R0
 	MOVD gobuf_g(R0), R1
@ -183,16 +178,6 @@ TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
 	UNDEF

-// AES hashing not implemented for wasm
-TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
-	JMP	runtime·memhashFallback(SB)
-TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·strhashFallback(SB)
-TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash32Fallback(SB)
-TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
-	JMP	runtime·memhash64Fallback(SB)
-
 TEXT runtime·asminit(SB), NOSPLIT, $0-0
 	// No per-thread init.
 	RET
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@ -12,6 +12,7 @@ import (
 	"internal/goos"
 	"internal/runtime/atomic"
 	"internal/runtime/gc"
+	"internal/runtime/maps"
 	"internal/runtime/sys"
 	"unsafe"
 )
@ -211,7 +212,7 @@ var (
 	IfaceHash  = ifaceHash
 )

-var UseAeshash = &useAeshash
+var UseAeshash = &maps.UseAeshash

 func MemclrBytes(b []byte) {
 	s := (*slice)(unsafe.Pointer(&b))
@ -254,7 +255,6 @@ func SetTracebackEnv(level string) {
 	traceback_env = traceback_cache
 }

-var ReadUnaligned32 = readUnaligned32
 var ReadUnaligned64 = readUnaligned64

 func CountPagesInUse() (pagesInUse, counted uintptr) {
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@ -1077,7 +1077,7 @@ func TestMemHashGlobalSeed(t *testing.T) {

 	testenv.MustHaveExec(t)

-	// aeshash and memhashFallback use separate per-process seeds, so test
+	// aeshash and memHashFallback use separate per-process seeds, so test
 	// both.
 	t.Run("aes", func(t *testing.T) {
 		if !*runtime.UseAeshash {
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@ -12,6 +12,7 @@ import (
 	"internal/goos"
 	"internal/runtime/atomic"
 	"internal/runtime/exithook"
+	"internal/runtime/maps"
 	"internal/runtime/sys"
 	"internal/strconv"
 	"internal/stringslite"
@ -878,10 +879,10 @@ func schedinit() {
 	ticks.init() // run as early as possible
 	moduledataverify()
 	stackinit()
-	randinit() // must run before mallocinit, alginit, mcommoninit
+	randinit() // must run before mallocinit, AlgInit, mcommoninit
 	mallocinit()
-	cpuinit(godebug) // must run before alginit
-	alginit()        // maps, hash, rand must not be used before this call
+	cpuinit(godebug) // must run before AlgInit
+	maps.AlgInit()   // maps, hash, rand must not be used before this call
 	mcommoninit(gp.m, -1)
 	modulesinit()   // provides activeModules
 	typelinksinit() // uses maps, activeModules
--- a/src/runtime/rand.go
+++ b/src/runtime/rand.go
@ -117,7 +117,10 @@ func allZero(b []byte) bool {
 	return acc == 0
 }

+// Used in internal/runtime/maps
 // bootstrapRand returns a random uint64 from the global random generator.
+//
+//go:linknamestd bootstrapRand
 func bootstrapRand() uint64 {
 	lock(&globalRand.lock)
 	if !globalRand.init {
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@ -287,10 +287,6 @@ func check() {
 	if fixedStack != round2(fixedStack) {
 		throw("FixedStack is not power-of-2")
 	}
-
-	if !checkASM() {
-		throw("assembly checks failed")
-	}
 }

 type dbgVar struct {
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@ -395,9 +395,6 @@ func divRoundUp(n, a uintptr) uintptr {
 	return (n + a - 1) / a
 }

-// checkASM reports whether assembly runtime checks have passed.
-func checkASM() bool
-
 func memequal_varlen(a, b unsafe.Pointer) bool

 // bool2int returns 0 if x is false or 1 if x is true.