diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index 38ccfaac8c6..2c17300ae44 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -18,7 +18,7 @@ func (x Int8x32) Absolute() Int8x32
 
 // Absolute computes the absolute value of each element.
 //
-// Asm: VPABSB, CPU Feature: AVX512EVEX
+// Asm: VPABSB, CPU Feature: AVX512BW
 func (x Int8x64) Absolute() Int8x64
 
 // Absolute computes the absolute value of each element.
@@ -33,7 +33,7 @@ func (x Int16x16) Absolute() Int16x16
 
 // Absolute computes the absolute value of each element.
 //
-// Asm: VPABSW, CPU Feature: AVX512EVEX
+// Asm: VPABSW, CPU Feature: AVX512BW
 func (x Int16x32) Absolute() Int16x32
 
 // Absolute computes the absolute value of each element.
@@ -48,84 +48,84 @@ func (x Int32x8) Absolute() Int32x8
 
 // Absolute computes the absolute value of each element.
 //
-// Asm: VPABSD, CPU Feature: AVX512EVEX
+// Asm: VPABSD, CPU Feature: AVX512F
 func (x Int32x16) Absolute() Int32x16
 
 // Absolute computes the absolute value of each element.
 //
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
+// Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x2) Absolute() Int64x2
 
 // Absolute computes the absolute value of each element.
 //
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
+// Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x4) Absolute() Int64x4
 
 // Absolute computes the absolute value of each element.
 //
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
+// Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x8) Absolute() Int64x8
 
 /* AbsoluteMasked */
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSB, CPU Feature: AVX512EVEX
+// Asm: VPABSB, CPU Feature: AVX512BW
 func (x Int8x16) AbsoluteMasked(y Mask8x16) Int8x16
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSB, CPU Feature: AVX512EVEX
+// Asm: VPABSB, CPU Feature: AVX512BW
 func (x Int8x32) AbsoluteMasked(y Mask8x32) Int8x32
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSB, CPU Feature: AVX512EVEX
+// Asm: VPABSB, CPU Feature: AVX512BW
 func (x Int8x64) AbsoluteMasked(y Mask8x64) Int8x64
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSW, CPU Feature: AVX512EVEX
+// Asm: VPABSW, CPU Feature: AVX512BW
 func (x Int16x8) AbsoluteMasked(y Mask16x8) Int16x8
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSW, CPU Feature: AVX512EVEX
+// Asm: VPABSW, CPU Feature: AVX512BW
 func (x Int16x16) AbsoluteMasked(y Mask16x16) Int16x16
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSW, CPU Feature: AVX512EVEX
+// Asm: VPABSW, CPU Feature: AVX512BW
 func (x Int16x32) AbsoluteMasked(y Mask16x32) Int16x32
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSD, CPU Feature: AVX512EVEX
+// Asm: VPABSD, CPU Feature: AVX512F
 func (x Int32x4) AbsoluteMasked(y Mask32x4) Int32x4
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSD, CPU Feature: AVX512EVEX
+// Asm: VPABSD, CPU Feature: AVX512F
 func (x Int32x8) AbsoluteMasked(y Mask32x8) Int32x8
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSD, CPU Feature: AVX512EVEX
+// Asm: VPABSD, CPU Feature: AVX512F
 func (x Int32x16) AbsoluteMasked(y Mask32x16) Int32x16
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
+// Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x2) AbsoluteMasked(y Mask64x2) Int64x2
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
+// Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x4) AbsoluteMasked(y Mask64x4) Int64x4
 
 // AbsoluteMasked computes the absolute value of each element.
 //
-// Asm: VPABSQ, CPU Feature: AVX512EVEX
+// Asm: VPABSQ, CPU Feature: AVX512F
 func (x Int64x8) AbsoluteMasked(y Mask64x8) Int64x8
 
 /* Add */
@@ -142,7 +142,7 @@ func (x Float32x8) Add(y Float32x8) Float32x8
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VADDPS, CPU Feature: AVX512EVEX
+// Asm: VADDPS, CPU Feature: AVX512F
 func (x Float32x16) Add(y Float32x16) Float32x16
 
 // Add adds corresponding elements of two vectors.
@@ -157,7 +157,7 @@ func (x Float64x4) Add(y Float64x4) Float64x4
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VADDPD, CPU Feature: AVX512EVEX
+// Asm: VADDPD, CPU Feature: AVX512F
 func (x Float64x8) Add(y Float64x8) Float64x8
 
 // Add adds corresponding elements of two vectors.
@@ -172,7 +172,7 @@ func (x Int8x32) Add(y Int8x32) Int8x32
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Int8x64) Add(y Int8x64) Int8x64
 
 // Add adds corresponding elements of two vectors.
@@ -187,7 +187,7 @@ func (x Int16x16) Add(y Int16x16) Int16x16
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Int16x32) Add(y Int16x32) Int16x32
 
 // Add adds corresponding elements of two vectors.
@@ -202,7 +202,7 @@ func (x Int32x8) Add(y Int32x8) Int32x8
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Int32x16) Add(y Int32x16) Int32x16
 
 // Add adds corresponding elements of two vectors.
@@ -217,7 +217,7 @@ func (x Int64x4) Add(y Int64x4) Int64x4
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Int64x8) Add(y Int64x8) Int64x8
 
 // Add adds corresponding elements of two vectors.
@@ -232,7 +232,7 @@ func (x Uint8x32) Add(y Uint8x32) Uint8x32
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Uint8x64) Add(y Uint8x64) Uint8x64
 
 // Add adds corresponding elements of two vectors.
@@ -247,7 +247,7 @@ func (x Uint16x16) Add(y Uint16x16) Uint16x16
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Uint16x32) Add(y Uint16x32) Uint16x32
 
 // Add adds corresponding elements of two vectors.
@@ -262,7 +262,7 @@ func (x Uint32x8) Add(y Uint32x8) Uint32x8
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Uint32x16) Add(y Uint32x16) Uint32x16
 
 // Add adds corresponding elements of two vectors.
@@ -277,159 +277,159 @@ func (x Uint64x4) Add(y Uint64x4) Uint64x4
 
 // Add adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Uint64x8) Add(y Uint64x8) Uint64x8
 
 /* AddMasked */
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VADDPS, CPU Feature: AVX512EVEX
+// Asm: VADDPS, CPU Feature: AVX512F
 func (x Float32x4) AddMasked(y Float32x4, z Mask32x4) Float32x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VADDPS, CPU Feature: AVX512EVEX
+// Asm: VADDPS, CPU Feature: AVX512F
 func (x Float32x8) AddMasked(y Float32x8, z Mask32x8) Float32x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VADDPS, CPU Feature: AVX512EVEX
+// Asm: VADDPS, CPU Feature: AVX512F
 func (x Float32x16) AddMasked(y Float32x16, z Mask32x16) Float32x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VADDPD, CPU Feature: AVX512EVEX
+// Asm: VADDPD, CPU Feature: AVX512F
 func (x Float64x2) AddMasked(y Float64x2, z Mask64x2) Float64x2
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VADDPD, CPU Feature: AVX512EVEX
+// Asm: VADDPD, CPU Feature: AVX512F
 func (x Float64x4) AddMasked(y Float64x4, z Mask64x4) Float64x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VADDPD, CPU Feature: AVX512EVEX
+// Asm: VADDPD, CPU Feature: AVX512F
 func (x Float64x8) AddMasked(y Float64x8, z Mask64x8) Float64x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Int8x16) AddMasked(y Int8x16, z Mask8x16) Int8x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Int8x32) AddMasked(y Int8x32, z Mask8x32) Int8x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Int8x64) AddMasked(y Int8x64, z Mask8x64) Int8x64
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Int16x8) AddMasked(y Int16x8, z Mask16x8) Int16x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Int16x16) AddMasked(y Int16x16, z Mask16x16) Int16x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Int16x32) AddMasked(y Int16x32, z Mask16x32) Int16x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Int32x4) AddMasked(y Int32x4, z Mask32x4) Int32x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Int32x8) AddMasked(y Int32x8, z Mask32x8) Int32x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Int32x16) AddMasked(y Int32x16, z Mask32x16) Int32x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Int64x2) AddMasked(y Int64x2, z Mask64x2) Int64x2
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Int64x4) AddMasked(y Int64x4, z Mask64x4) Int64x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Int64x8) AddMasked(y Int64x8, z Mask64x8) Int64x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Uint8x16) AddMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Uint8x32) AddMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDB, CPU Feature: AVX512EVEX
+// Asm: VPADDB, CPU Feature: AVX512BW
 func (x Uint8x64) AddMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Uint16x8) AddMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Uint16x16) AddMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDW, CPU Feature: AVX512EVEX
+// Asm: VPADDW, CPU Feature: AVX512BW
 func (x Uint16x32) AddMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Uint32x4) AddMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Uint32x8) AddMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDD, CPU Feature: AVX512EVEX
+// Asm: VPADDD, CPU Feature: AVX512F
 func (x Uint32x16) AddMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Uint64x2) AddMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Uint64x4) AddMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // AddMasked adds corresponding elements of two vectors.
 //
-// Asm: VPADDQ, CPU Feature: AVX512EVEX
+// Asm: VPADDQ, CPU Feature: AVX512F
 func (x Uint64x8) AddMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* AddSub */
@@ -488,7 +488,7 @@ func (x Int32x8) And(y Int32x8) Int32x8
 
 // And performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x16) And(y Int32x16) Int32x16
 
 // And performs a bitwise AND operation between two vectors.
@@ -503,7 +503,7 @@ func (x Int64x4) And(y Int64x4) Int64x4
 
 // And performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x8) And(y Int64x8) Int64x8
 
 // And performs a bitwise AND operation between two vectors.
@@ -538,7 +538,7 @@ func (x Uint32x8) And(y Uint32x8) Uint32x8
 
 // And performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x16) And(y Uint32x16) Uint32x16
 
 // And performs a bitwise AND operation between two vectors.
@@ -553,69 +553,69 @@ func (x Uint64x4) And(y Uint64x4) Uint64x4
 
 // And performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x8) And(y Uint64x8) Uint64x8
 
 /* AndMasked */
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x4) AndMasked(y Int32x4, z Mask32x4) Int32x4
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x8) AndMasked(y Int32x8, z Mask32x8) Int32x8
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Int32x16) AndMasked(y Int32x16, z Mask32x16) Int32x16
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x2) AndMasked(y Int64x2, z Mask64x2) Int64x2
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x4) AndMasked(y Int64x4, z Mask64x4) Int64x4
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Int64x8) AndMasked(y Int64x8, z Mask64x8) Int64x8
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x4) AndMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x8) AndMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDD, CPU Feature: AVX512EVEX
+// Asm: VPANDD, CPU Feature: AVX512F
 func (x Uint32x16) AndMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x2) AndMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x4) AndMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // AndMasked performs a masked bitwise AND operation between two vectors.
 //
-// Asm: VPANDQ, CPU Feature: AVX512EVEX
+// Asm: VPANDQ, CPU Feature: AVX512F
 func (x Uint64x8) AndMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* AndNot */
@@ -652,7 +652,7 @@ func (x Int32x8) AndNot(y Int32x8) Int32x8
 
 // AndNot performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x16) AndNot(y Int32x16) Int32x16
 
 // AndNot performs a bitwise AND NOT operation between two vectors.
@@ -667,7 +667,7 @@ func (x Int64x4) AndNot(y Int64x4) Int64x4
 
 // AndNot performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x8) AndNot(y Int64x8) Int64x8
 
 // AndNot performs a bitwise AND NOT operation between two vectors.
@@ -702,7 +702,7 @@ func (x Uint32x8) AndNot(y Uint32x8) Uint32x8
 
 // AndNot performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x16) AndNot(y Uint32x16) Uint32x16
 
 // AndNot performs a bitwise AND NOT operation between two vectors.
@@ -717,133 +717,133 @@ func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
 
 // AndNot performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
 
 /* AndNotMasked */
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x4) AndNotMasked(y Int32x4, z Mask32x4) Int32x4
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x8) AndNotMasked(y Int32x8, z Mask32x8) Int32x8
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x16) AndNotMasked(y Int32x16, z Mask32x16) Int32x16
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x2) AndNotMasked(y Int64x2, z Mask64x2) Int64x2
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x4) AndNotMasked(y Int64x4, z Mask64x4) Int64x4
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x8) AndNotMasked(y Int64x8, z Mask64x8) Int64x8
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x4) AndNotMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x8) AndNotMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDND, CPU Feature: AVX512EVEX
+// Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x16) AndNotMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x2) AndNotMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x4) AndNotMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 //
-// Asm: VPANDNQ, CPU Feature: AVX512EVEX
+// Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x8) AndNotMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* ApproximateReciprocal */
 
 // ApproximateReciprocal computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+// Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x4) ApproximateReciprocal() Float32x4
 
 // ApproximateReciprocal computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+// Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x8) ApproximateReciprocal() Float32x8
 
 // ApproximateReciprocal computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+// Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x16) ApproximateReciprocal() Float32x16
 
 // ApproximateReciprocal computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+// Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x2) ApproximateReciprocal() Float64x2
 
 // ApproximateReciprocal computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+// Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x4) ApproximateReciprocal() Float64x4
 
 // ApproximateReciprocal computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+// Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x8) ApproximateReciprocal() Float64x8
 
 /* ApproximateReciprocalMasked */
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+// Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x4) ApproximateReciprocalMasked(y Mask32x4) Float32x4
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+// Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x8) ApproximateReciprocalMasked(y Mask32x8) Float32x8
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PS, CPU Feature: AVX512EVEX
+// Asm: VRCP14PS, CPU Feature: AVX512F
 func (x Float32x16) ApproximateReciprocalMasked(y Mask32x16) Float32x16
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+// Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x2) ApproximateReciprocalMasked(y Mask64x2) Float64x2
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+// Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x4) ApproximateReciprocalMasked(y Mask64x4) Float64x4
 
 // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 //
-// Asm: VRCP14PD, CPU Feature: AVX512EVEX
+// Asm: VRCP14PD, CPU Feature: AVX512F
 func (x Float64x8) ApproximateReciprocalMasked(y Mask64x8) Float64x8
 
 /* ApproximateReciprocalOfSqrt */
@@ -860,54 +860,54 @@ func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8
 
 // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
 func (x Float32x16) ApproximateReciprocalOfSqrt() Float32x16
 
 // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2
 
 // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4
 
 // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8
 
 /* ApproximateReciprocalOfSqrtMasked */
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
 func (x Float32x4) ApproximateReciprocalOfSqrtMasked(y Mask32x4) Float32x4
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
 func (x Float32x8) ApproximateReciprocalOfSqrtMasked(y Mask32x8) Float32x8
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
 func (x Float32x16) ApproximateReciprocalOfSqrtMasked(y Mask32x16) Float32x16
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x2) ApproximateReciprocalOfSqrtMasked(y Mask64x2) Float64x2
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x4) ApproximateReciprocalOfSqrtMasked(y Mask64x4) Float64x4
 
 // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 //
-// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
 func (x Float64x8) ApproximateReciprocalOfSqrtMasked(y Mask64x8) Float64x8
 
 /* Average */
@@ -924,7 +924,7 @@ func (x Uint8x32) Average(y Uint8x32) Uint8x32
 
 // Average computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
+// Asm: VPAVGB, CPU Feature: AVX512BW
 func (x Uint8x64) Average(y Uint8x64) Uint8x64
 
 // Average computes the rounded average of corresponding elements.
@@ -939,39 +939,39 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
 
 // Average computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
+// Asm: VPAVGW, CPU Feature: AVX512BW
 func (x Uint16x32) Average(y Uint16x32) Uint16x32
 
 /* AverageMasked */
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
+// Asm: VPAVGB, CPU Feature: AVX512BW
 func (x Uint8x16) AverageMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
+// Asm: VPAVGB, CPU Feature: AVX512BW
 func (x Uint8x32) AverageMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGB, CPU Feature: AVX512EVEX
+// Asm: VPAVGB, CPU Feature: AVX512BW
 func (x Uint8x64) AverageMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
+// Asm: VPAVGW, CPU Feature: AVX512BW
 func (x Uint16x8) AverageMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
+// Asm: VPAVGW, CPU Feature: AVX512BW
 func (x Uint16x16) AverageMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // AverageMasked computes the rounded average of corresponding elements.
 //
-// Asm: VPAVGW, CPU Feature: AVX512EVEX
+// Asm: VPAVGW, CPU Feature: AVX512BW
 func (x Uint16x32) AverageMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 /* Ceil */
@@ -1002,42 +1002,42 @@ func (x Float64x4) Ceil() Float64x4
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) CeilWithPrecision(prec uint8) Float32x4
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) CeilWithPrecision(prec uint8) Float32x8
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) CeilWithPrecision(prec uint8) Float32x16
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) CeilWithPrecision(prec uint8) Float64x2
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) CeilWithPrecision(prec uint8) Float64x4
 
 // CeilWithPrecision rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) CeilWithPrecision(prec uint8) Float64x8
 
 /* CeilWithPrecisionMasked */
@@ -1046,42 +1046,42 @@ func (x Float64x8) CeilWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) CeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) CeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) CeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) CeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) CeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) CeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* DiffWithCeilWithPrecision */
@@ -1090,42 +1090,42 @@ func (x Float64x8) CeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithCeilWithPrecision(prec uint8) Float32x4
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithCeilWithPrecision(prec uint8) Float32x8
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithCeilWithPrecision(prec uint8) Float32x16
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithCeilWithPrecision(prec uint8) Float64x2
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithCeilWithPrecision(prec uint8) Float64x4
 
 // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithCeilWithPrecision(prec uint8) Float64x8
 
 /* DiffWithCeilWithPrecisionMasked */
@@ -1134,42 +1134,42 @@ func (x Float64x8) DiffWithCeilWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* DiffWithFloorWithPrecision */
@@ -1178,42 +1178,42 @@ func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, y Mask64x8) Float
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithFloorWithPrecision(prec uint8) Float32x4
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithFloorWithPrecision(prec uint8) Float32x8
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithFloorWithPrecision(prec uint8) Float32x16
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithFloorWithPrecision(prec uint8) Float64x2
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithFloorWithPrecision(prec uint8) Float64x4
 
 // DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithFloorWithPrecision(prec uint8) Float64x8
 
 /* DiffWithFloorWithPrecisionMasked */
@@ -1222,42 +1222,42 @@ func (x Float64x8) DiffWithFloorWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* DiffWithRoundWithPrecision */
@@ -1266,42 +1266,42 @@ func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, y Mask64x8) Floa
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithRoundWithPrecision(prec uint8) Float32x4
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithRoundWithPrecision(prec uint8) Float32x8
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithRoundWithPrecision(prec uint8) Float32x16
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithRoundWithPrecision(prec uint8) Float64x2
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithRoundWithPrecision(prec uint8) Float64x4
 
 // DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithRoundWithPrecision(prec uint8) Float64x8
 
 /* DiffWithRoundWithPrecisionMasked */
@@ -1310,42 +1310,42 @@ func (x Float64x8) DiffWithRoundWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* DiffWithTruncWithPrecision */
@@ -1354,42 +1354,42 @@ func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, y Mask64x8) Floa
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithTruncWithPrecision(prec uint8) Float32x4
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithTruncWithPrecision(prec uint8) Float32x8
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithTruncWithPrecision(prec uint8) Float32x16
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithTruncWithPrecision(prec uint8) Float64x2
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithTruncWithPrecision(prec uint8) Float64x4
 
 // DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithTruncWithPrecision(prec uint8) Float64x8
 
 /* DiffWithTruncWithPrecisionMasked */
@@ -1398,42 +1398,42 @@ func (x Float64x8) DiffWithTruncWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
 func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
 func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* Div */
@@ -1450,7 +1450,7 @@ func (x Float32x8) Div(y Float32x8) Float32x8
 
 // Div divides elements of two vectors.
 //
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
+// Asm: VDIVPS, CPU Feature: AVX512F
 func (x Float32x16) Div(y Float32x16) Float32x16
 
 // Div divides elements of two vectors.
@@ -1465,39 +1465,39 @@ func (x Float64x4) Div(y Float64x4) Float64x4
 
 // Div divides elements of two vectors.
 //
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
+// Asm: VDIVPD, CPU Feature: AVX512F
 func (x Float64x8) Div(y Float64x8) Float64x8
 
 /* DivMasked */
 
 // DivMasked divides elements of two vectors.
 //
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
+// Asm: VDIVPS, CPU Feature: AVX512F
 func (x Float32x4) DivMasked(y Float32x4, z Mask32x4) Float32x4
 
 // DivMasked divides elements of two vectors.
 //
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
+// Asm: VDIVPS, CPU Feature: AVX512F
 func (x Float32x8) DivMasked(y Float32x8, z Mask32x8) Float32x8
 
 // DivMasked divides elements of two vectors.
 //
-// Asm: VDIVPS, CPU Feature: AVX512EVEX
+// Asm: VDIVPS, CPU Feature: AVX512F
 func (x Float32x16) DivMasked(y Float32x16, z Mask32x16) Float32x16
 
 // DivMasked divides elements of two vectors.
 //
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
+// Asm: VDIVPD, CPU Feature: AVX512F
 func (x Float64x2) DivMasked(y Float64x2, z Mask64x2) Float64x2
 
 // DivMasked divides elements of two vectors.
 //
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
+// Asm: VDIVPD, CPU Feature: AVX512F
 func (x Float64x4) DivMasked(y Float64x4, z Mask64x4) Float64x4
 
 // DivMasked divides elements of two vectors.
 //
-// Asm: VDIVPD, CPU Feature: AVX512EVEX
+// Asm: VDIVPD, CPU Feature: AVX512F
 func (x Float64x8) DivMasked(y Float64x8, z Mask64x8) Float64x8
 
 /* DotProdBroadcast */
@@ -1601,7 +1601,7 @@ func (x Float32x8) Equal(y Float32x8) Mask32x8
 
 // Equal compares for equality, masked.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) Equal(y Float32x16) Mask32x16
 
 // Equal compares for equality.
@@ -1616,199 +1616,199 @@ func (x Float64x4) Equal(y Float64x4) Mask64x4
 
 // Equal compares for equality, masked.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) Equal(y Float64x8) Mask64x8
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) Equal(y Int8x64) Mask8x64
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) Equal(y Int16x32) Mask16x32
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) Equal(y Int32x16) Mask32x16
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) Equal(y Int64x8) Mask64x8
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) Equal(y Uint8x64) Mask8x64
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) Equal(y Uint16x32) Mask16x32
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) Equal(y Uint32x16) Mask32x16
 
 // Equal compares for equality, masked.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) Equal(y Uint64x8) Mask64x8
 
 /* EqualMasked */
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) EqualMasked(y Float32x4, z Mask32x4) Mask32x4
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) EqualMasked(y Float32x8, z Mask32x8) Mask32x8
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) EqualMasked(y Float32x16, z Mask32x16) Mask32x16
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) EqualMasked(y Float64x2, z Mask64x2) Mask64x2
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) EqualMasked(y Float64x4, z Mask64x4) Mask64x4
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) EqualMasked(y Float64x8, z Mask64x8) Mask64x8
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) EqualMasked(y Int8x16, z Mask8x16) Mask8x16
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) EqualMasked(y Int8x32, z Mask8x32) Mask8x32
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) EqualMasked(y Int8x64, z Mask8x64) Mask8x64
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) EqualMasked(y Int16x8, z Mask16x8) Mask16x8
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) EqualMasked(y Int16x16, z Mask16x16) Mask16x16
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) EqualMasked(y Int16x32, z Mask16x32) Mask16x32
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) EqualMasked(y Int32x4, z Mask32x4) Mask32x4
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) EqualMasked(y Int32x8, z Mask32x8) Mask32x8
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) EqualMasked(y Int32x16, z Mask32x16) Mask32x16
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) EqualMasked(y Int64x2, z Mask64x2) Mask64x2
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) EqualMasked(y Int64x4, z Mask64x4) Mask64x4
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) EqualMasked(y Int64x8, z Mask64x8) Mask64x8
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) EqualMasked(y Uint8x16, z Mask8x16) Mask8x16
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) EqualMasked(y Uint8x32, z Mask8x32) Mask8x32
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) EqualMasked(y Uint8x64, z Mask8x64) Mask8x64
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) EqualMasked(y Uint16x8, z Mask16x8) Mask16x8
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) EqualMasked(y Uint16x16, z Mask16x16) Mask16x16
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) EqualMasked(y Uint16x32, z Mask16x32) Mask16x32
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) EqualMasked(y Uint32x4, z Mask32x4) Mask32x4
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) EqualMasked(y Uint32x8, z Mask32x8) Mask32x8
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) EqualMasked(y Uint32x16, z Mask32x16) Mask32x16
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) EqualMasked(y Uint64x2, z Mask64x2) Mask64x2
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) EqualMasked(y Uint64x4, z Mask64x4) Mask64x4
 
 // EqualMasked compares for equality, masked.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) EqualMasked(y Uint64x8, z Mask64x8) Mask64x8
 
 /* Floor */
@@ -1839,42 +1839,42 @@ func (x Float64x4) Floor() Float64x4
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) FloorWithPrecision(prec uint8) Float32x4
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) FloorWithPrecision(prec uint8) Float32x8
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) FloorWithPrecision(prec uint8) Float32x16
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) FloorWithPrecision(prec uint8) Float64x2
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) FloorWithPrecision(prec uint8) Float64x4
 
 // FloorWithPrecision rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) FloorWithPrecision(prec uint8) Float64x8
 
 /* FloorWithPrecisionMasked */
@@ -1883,234 +1883,234 @@ func (x Float64x8) FloorWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) FloorWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) FloorWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) FloorWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) FloorWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) FloorWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) FloorWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* FusedMultiplyAdd */
 
 // FusedMultiplyAdd performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplyAdd(y Float32x4, z Float32x4) Float32x4
 
 // FusedMultiplyAdd performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplyAdd(y Float32x8, z Float32x8) Float32x8
 
 // FusedMultiplyAdd performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplyAdd(y Float32x16, z Float32x16) Float32x16
 
 // FusedMultiplyAdd performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplyAdd(y Float64x2, z Float64x2) Float64x2
 
 // FusedMultiplyAdd performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4
 
 // FusedMultiplyAdd performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8
 
 /* FusedMultiplyAddMasked */
 
 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
 
 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
 
 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
 
 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
 
 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
 
 // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
 //
-// Asm: VFMADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADD213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
 
 /* FusedMultiplyAddSub */
 
 // FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplyAddSub(y Float32x4, z Float32x4) Float32x4
 
 // FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplyAddSub(y Float32x8, z Float32x8) Float32x8
 
 // FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplyAddSub(y Float32x16, z Float32x16) Float32x16
 
 // FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplyAddSub(y Float64x2, z Float64x2) Float64x2
 
 // FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4
 
 // FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8
 
 /* FusedMultiplyAddSubMasked */
 
 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
 
 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
 
 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
 
 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
 
 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
 
 // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
 //
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
 
 /* FusedMultiplySubAdd */
 
 // FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplySubAdd(y Float32x4, z Float32x4) Float32x4
 
 // FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplySubAdd(y Float32x8, z Float32x8) Float32x8
 
 // FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplySubAdd(y Float32x16, z Float32x16) Float32x16
 
 // FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplySubAdd(y Float64x2, z Float64x2) Float64x2
 
 // FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4
 
 // FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
 
 /* FusedMultiplySubAddMasked */
 
 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4
 
 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8
 
 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
 func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16
 
 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2
 
 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4
 
 // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 //
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
 func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8
 
 /* GaloisFieldAffineTransform */
@@ -2122,7 +2122,7 @@ func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, u Mask64x
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
 func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
@@ -2132,7 +2132,7 @@ func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
 func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
@@ -2142,7 +2142,7 @@ func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64
 
 /* GaloisFieldAffineTransformInverse */
@@ -2155,7 +2155,7 @@ func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x16) GaloisFieldAffineTransformInverse(y Uint64x2, b uint8) Uint8x16
 
 // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
@@ -2166,7 +2166,7 @@ func (x Uint8x16) GaloisFieldAffineTransformInverse(y Uint64x2, b uint8) Uint8x1
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x32
 
 // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
@@ -2177,7 +2177,7 @@ func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x3
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64
 
 /* GaloisFieldAffineTransformInverseMasked */
@@ -2190,7 +2190,7 @@ func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x6
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, m Mask8x16) Uint8x16
 
 // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
@@ -2201,7 +2201,7 @@ func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, m
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, m Mask8x32) Uint8x32
 
 // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
@@ -2212,7 +2212,7 @@ func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, m
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, m Mask8x64) Uint8x64
 
 /* GaloisFieldAffineTransformMasked */
@@ -2224,7 +2224,7 @@ func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, m
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
 func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, m Mask8x16) Uint8x16
 
 // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
@@ -2234,7 +2234,7 @@ func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, m Mask8x
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
 func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, m Mask8x32) Uint8x32
 
 // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
@@ -2244,7 +2244,7 @@ func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, m Mask8x
 //
 // b is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, m Mask8x64) Uint8x64
 
 /* GaloisFieldMul */
@@ -2252,19 +2252,19 @@ func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, m Mask8x
 // GaloisFieldMul computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
-// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x16) GaloisFieldMul(y Uint8x16) Uint8x16
 
 // GaloisFieldMul computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
-// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
 
 // GaloisFieldMul computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
-// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 
 /* GaloisFieldMulMasked */
@@ -2272,19 +2272,19 @@ func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
-// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
-// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
 // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 //
-// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
+// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 /* Get128 */
@@ -2365,14 +2365,14 @@ func (x Uint64x4) Get128(index uint8) Uint64x2
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPEXTRB, CPU Feature: AVX512EVEX
+// Asm: VPEXTRB, CPU Feature: AVX512BW
 func (x Int8x16) GetElem(index uint8) int8
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPEXTRW, CPU Feature: AVX512EVEX
+// Asm: VPEXTRW, CPU Feature: AVX512BW
 func (x Int16x8) GetElem(index uint8) int16
 
 // GetElem retrieves a single constant-indexed element's value.
@@ -2393,14 +2393,14 @@ func (x Int64x2) GetElem(index uint8) int64
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPEXTRB, CPU Feature: AVX512EVEX
+// Asm: VPEXTRB, CPU Feature: AVX512BW
 func (x Uint8x16) GetElem(index uint8) uint8
 
 // GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPEXTRW, CPU Feature: AVX512EVEX
+// Asm: VPEXTRW, CPU Feature: AVX512BW
 func (x Uint16x8) GetElem(index uint8) uint16
 
 // GetElem retrieves a single constant-indexed element's value.
@@ -2471,7 +2471,7 @@ func (x Float32x8) Greater(y Float32x8) Mask32x8
 
 // Greater compares for greater than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) Greater(y Float32x16) Mask32x16
 
 // Greater compares for greater than.
@@ -2486,87 +2486,87 @@ func (x Float64x4) Greater(y Float64x4) Mask64x4
 
 // Greater compares for greater than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) Greater(y Float64x8) Mask64x8
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) Greater(y Int8x64) Mask8x64
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) Greater(y Int16x32) Mask16x32
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) Greater(y Int32x16) Mask32x16
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) Greater(y Int64x8) Mask64x8
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) Greater(y Uint8x16) Mask8x16
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) Greater(y Uint8x32) Mask8x32
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) Greater(y Uint8x64) Mask8x64
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) Greater(y Uint16x8) Mask16x8
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) Greater(y Uint16x16) Mask16x16
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) Greater(y Uint16x32) Mask16x32
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) Greater(y Uint32x4) Mask32x4
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) Greater(y Uint32x8) Mask32x8
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) Greater(y Uint32x16) Mask32x16
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) Greater(y Uint64x2) Mask64x2
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) Greater(y Uint64x4) Mask64x4
 
 // Greater compares for greater than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) Greater(y Uint64x8) Mask64x8
 
 /* GreaterEqual */
@@ -2583,7 +2583,7 @@ func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
 
 // GreaterEqual compares for greater than or equal.
@@ -2598,431 +2598,431 @@ func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4
 
 // GreaterEqual compares for greater than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
 
 /* GreaterEqualMasked */
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) GreaterEqualMasked(y Float32x4, z Mask32x4) Mask32x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) GreaterEqualMasked(y Float32x8, z Mask32x8) Mask32x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) GreaterEqualMasked(y Float32x16, z Mask32x16) Mask32x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) GreaterEqualMasked(y Float64x2, z Mask64x2) Mask64x2
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) GreaterEqualMasked(y Float64x4, z Mask64x4) Mask64x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) GreaterEqualMasked(y Float64x8, z Mask64x8) Mask64x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) GreaterEqualMasked(y Int8x16, z Mask8x16) Mask8x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) GreaterEqualMasked(y Int8x32, z Mask8x32) Mask8x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) GreaterEqualMasked(y Int8x64, z Mask8x64) Mask8x64
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) GreaterEqualMasked(y Int16x8, z Mask16x8) Mask16x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) GreaterEqualMasked(y Int16x16, z Mask16x16) Mask16x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) GreaterEqualMasked(y Int16x32, z Mask16x32) Mask16x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) GreaterEqualMasked(y Int32x4, z Mask32x4) Mask32x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) GreaterEqualMasked(y Int32x8, z Mask32x8) Mask32x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) GreaterEqualMasked(y Int32x16, z Mask32x16) Mask32x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) GreaterEqualMasked(y Int64x2, z Mask64x2) Mask64x2
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) GreaterEqualMasked(y Int64x4, z Mask64x4) Mask64x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) GreaterEqualMasked(y Int64x8, z Mask64x8) Mask64x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) GreaterEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) GreaterEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) GreaterEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) GreaterEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) GreaterEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) GreaterEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) GreaterEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) GreaterEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) GreaterEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) GreaterEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) GreaterEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
 
 // GreaterEqualMasked compares for greater than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) GreaterEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
 
 /* GreaterMasked */
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) GreaterMasked(y Float32x4, z Mask32x4) Mask32x4
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) GreaterMasked(y Float32x8, z Mask32x8) Mask32x8
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) GreaterMasked(y Float32x16, z Mask32x16) Mask32x16
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) GreaterMasked(y Float64x2, z Mask64x2) Mask64x2
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) GreaterMasked(y Float64x4, z Mask64x4) Mask64x4
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) GreaterMasked(y Float64x8, z Mask64x8) Mask64x8
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) GreaterMasked(y Int8x16, z Mask8x16) Mask8x16
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) GreaterMasked(y Int8x32, z Mask8x32) Mask8x32
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) GreaterMasked(y Int8x64, z Mask8x64) Mask8x64
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) GreaterMasked(y Int16x8, z Mask16x8) Mask16x8
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) GreaterMasked(y Int16x16, z Mask16x16) Mask16x16
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) GreaterMasked(y Int16x32, z Mask16x32) Mask16x32
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) GreaterMasked(y Int32x4, z Mask32x4) Mask32x4
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) GreaterMasked(y Int32x8, z Mask32x8) Mask32x8
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) GreaterMasked(y Int32x16, z Mask32x16) Mask32x16
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) GreaterMasked(y Int64x2, z Mask64x2) Mask64x2
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) GreaterMasked(y Int64x4, z Mask64x4) Mask64x4
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) GreaterMasked(y Int64x8, z Mask64x8) Mask64x8
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) GreaterMasked(y Uint8x16, z Mask8x16) Mask8x16
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) GreaterMasked(y Uint8x32, z Mask8x32) Mask8x32
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) GreaterMasked(y Uint8x64, z Mask8x64) Mask8x64
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) GreaterMasked(y Uint16x8, z Mask16x8) Mask16x8
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) GreaterMasked(y Uint16x16, z Mask16x16) Mask16x16
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) GreaterMasked(y Uint16x32, z Mask16x32) Mask16x32
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) GreaterMasked(y Uint32x4, z Mask32x4) Mask32x4
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) GreaterMasked(y Uint32x8, z Mask32x8) Mask32x8
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) GreaterMasked(y Uint32x16, z Mask32x16) Mask32x16
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) GreaterMasked(y Uint64x2, z Mask64x2) Mask64x2
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) GreaterMasked(y Uint64x4, z Mask64x4) Mask64x4
 
 // GreaterMasked compares for greater than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) GreaterMasked(y Uint64x8, z Mask64x8) Mask64x8
 
 /* IsNan */
@@ -3039,7 +3039,7 @@ func (x Float32x8) IsNan(y Float32x8) Mask32x8
 
 // IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) IsNan(y Float32x16) Mask32x16
 
 // IsNan checks if elements are NaN. Use as x.IsNan(x).
@@ -3054,39 +3054,39 @@ func (x Float64x4) IsNan(y Float64x4) Mask64x4
 
 // IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) IsNan(y Float64x8) Mask64x8
 
 /* IsNanMasked */
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) IsNanMasked(y Float32x4, z Mask32x4) Mask32x4
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) IsNanMasked(y Float32x8, z Mask32x8) Mask32x8
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) IsNanMasked(y Float32x16, z Mask32x16) Mask32x16
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) IsNanMasked(y Float64x2, z Mask64x2) Mask64x2
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) IsNanMasked(y Float64x4, z Mask64x4) Mask64x4
 
 // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) IsNanMasked(y Float64x8, z Mask64x8) Mask64x8
 
 /* Less */
@@ -3103,7 +3103,7 @@ func (x Float32x8) Less(y Float32x8) Mask32x8
 
 // Less compares for less than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) Less(y Float32x16) Mask32x16
 
 // Less compares for less than.
@@ -3118,127 +3118,127 @@ func (x Float64x4) Less(y Float64x4) Mask64x4
 
 // Less compares for less than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) Less(y Float64x8) Mask64x8
 
 // Less compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) Less(y Int8x16) Mask8x16
 
 // Less compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) Less(y Int8x32) Mask8x32
 
 // Less compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) Less(y Int8x64) Mask8x64
 
 // Less compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) Less(y Int16x8) Mask16x8
 
 // Less compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) Less(y Int16x16) Mask16x16
 
 // Less compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) Less(y Int16x32) Mask16x32
 
 // Less compares for less than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) Less(y Int32x4) Mask32x4
 
 // Less compares for less than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) Less(y Int32x8) Mask32x8
 
 // Less compares for less than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) Less(y Int32x16) Mask32x16
 
 // Less compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) Less(y Int64x2) Mask64x2
 
 // Less compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) Less(y Int64x4) Mask64x4
 
 // Less compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) Less(y Int64x8) Mask64x8
 
 // Less compares for less than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) Less(y Uint8x16) Mask8x16
 
 // Less compares for less than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) Less(y Uint8x32) Mask8x32
 
 // Less compares for less than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) Less(y Uint8x64) Mask8x64
 
 // Less compares for less than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) Less(y Uint16x8) Mask16x8
 
 // Less compares for less than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) Less(y Uint16x16) Mask16x16
 
 // Less compares for less than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) Less(y Uint16x32) Mask16x32
 
 // Less compares for less than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) Less(y Uint32x4) Mask32x4
 
 // Less compares for less than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) Less(y Uint32x8) Mask32x8
 
 // Less compares for less than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) Less(y Uint32x16) Mask32x16
 
 // Less compares for less than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) Less(y Uint64x2) Mask64x2
 
 // Less compares for less than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) Less(y Uint64x4) Mask64x4
 
 // Less compares for less than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) Less(y Uint64x8) Mask64x8
 
 /* LessEqual */
@@ -3255,7 +3255,7 @@ func (x Float32x8) LessEqual(y Float32x8) Mask32x8
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) LessEqual(y Float32x16) Mask32x16
 
 // LessEqual compares for less than or equal.
@@ -3270,431 +3270,431 @@ func (x Float64x4) LessEqual(y Float64x4) Mask64x4
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) LessEqual(y Float64x8) Mask64x8
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) LessEqual(y Int8x16) Mask8x16
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) LessEqual(y Int8x32) Mask8x32
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) LessEqual(y Int8x64) Mask8x64
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) LessEqual(y Int16x8) Mask16x8
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) LessEqual(y Int16x16) Mask16x16
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) LessEqual(y Int16x32) Mask16x32
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) LessEqual(y Int32x4) Mask32x4
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) LessEqual(y Int32x8) Mask32x8
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) LessEqual(y Int32x16) Mask32x16
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) LessEqual(y Int64x2) Mask64x2
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) LessEqual(y Int64x4) Mask64x4
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) LessEqual(y Int64x8) Mask64x8
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4
 
 // LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
 
 /* LessEqualMasked */
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) LessEqualMasked(y Float32x4, z Mask32x4) Mask32x4
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) LessEqualMasked(y Float32x8, z Mask32x8) Mask32x8
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) LessEqualMasked(y Float32x16, z Mask32x16) Mask32x16
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) LessEqualMasked(y Float64x2, z Mask64x2) Mask64x2
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) LessEqualMasked(y Float64x4, z Mask64x4) Mask64x4
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) LessEqualMasked(y Float64x8, z Mask64x8) Mask64x8
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) LessEqualMasked(y Int8x16, z Mask8x16) Mask8x16
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) LessEqualMasked(y Int8x32, z Mask8x32) Mask8x32
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) LessEqualMasked(y Int8x64, z Mask8x64) Mask8x64
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) LessEqualMasked(y Int16x8, z Mask16x8) Mask16x8
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) LessEqualMasked(y Int16x16, z Mask16x16) Mask16x16
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) LessEqualMasked(y Int16x32, z Mask16x32) Mask16x32
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) LessEqualMasked(y Int32x4, z Mask32x4) Mask32x4
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) LessEqualMasked(y Int32x8, z Mask32x8) Mask32x8
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) LessEqualMasked(y Int32x16, z Mask32x16) Mask32x16
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) LessEqualMasked(y Int64x2, z Mask64x2) Mask64x2
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) LessEqualMasked(y Int64x4, z Mask64x4) Mask64x4
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) LessEqualMasked(y Int64x8, z Mask64x8) Mask64x8
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) LessEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) LessEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) LessEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) LessEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) LessEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) LessEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) LessEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) LessEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) LessEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) LessEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) LessEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
 
 // LessEqualMasked compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) LessEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
 
 /* LessMasked */
 
 // LessMasked compares for less than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) LessMasked(y Float32x4, z Mask32x4) Mask32x4
 
 // LessMasked compares for less than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) LessMasked(y Float32x8, z Mask32x8) Mask32x8
 
 // LessMasked compares for less than.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) LessMasked(y Float32x16, z Mask32x16) Mask32x16
 
 // LessMasked compares for less than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) LessMasked(y Float64x2, z Mask64x2) Mask64x2
 
 // LessMasked compares for less than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) LessMasked(y Float64x4, z Mask64x4) Mask64x4
 
 // LessMasked compares for less than.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) LessMasked(y Float64x8, z Mask64x8) Mask64x8
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) LessMasked(y Int8x16, z Mask8x16) Mask8x16
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) LessMasked(y Int8x32, z Mask8x32) Mask8x32
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) LessMasked(y Int8x64, z Mask8x64) Mask8x64
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) LessMasked(y Int16x8, z Mask16x8) Mask16x8
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) LessMasked(y Int16x16, z Mask16x16) Mask16x16
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) LessMasked(y Int16x32, z Mask16x32) Mask16x32
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) LessMasked(y Int32x4, z Mask32x4) Mask32x4
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) LessMasked(y Int32x8, z Mask32x8) Mask32x8
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) LessMasked(y Int32x16, z Mask32x16) Mask32x16
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) LessMasked(y Int64x2, z Mask64x2) Mask64x2
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) LessMasked(y Int64x4, z Mask64x4) Mask64x4
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) LessMasked(y Int64x8, z Mask64x8) Mask64x8
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) LessMasked(y Uint8x16, z Mask8x16) Mask8x16
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) LessMasked(y Uint8x32, z Mask8x32) Mask8x32
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) LessMasked(y Uint8x64, z Mask8x64) Mask8x64
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) LessMasked(y Uint16x8, z Mask16x8) Mask16x8
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) LessMasked(y Uint16x16, z Mask16x16) Mask16x16
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) LessMasked(y Uint16x32, z Mask16x32) Mask16x32
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) LessMasked(y Uint32x4, z Mask32x4) Mask32x4
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) LessMasked(y Uint32x8, z Mask32x8) Mask32x8
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) LessMasked(y Uint32x16, z Mask32x16) Mask32x16
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) LessMasked(y Uint64x2, z Mask64x2) Mask64x2
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) LessMasked(y Uint64x4, z Mask64x4) Mask64x4
 
 // LessMasked compares for less than.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) LessMasked(y Uint64x8, z Mask64x8) Mask64x8
 
 /* Max */
@@ -3711,7 +3711,7 @@ func (x Float32x8) Max(y Float32x8) Float32x8
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
+// Asm: VMAXPS, CPU Feature: AVX512F
 func (x Float32x16) Max(y Float32x16) Float32x16
 
 // Max computes the maximum of corresponding elements.
@@ -3726,7 +3726,7 @@ func (x Float64x4) Max(y Float64x4) Float64x4
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
+// Asm: VMAXPD, CPU Feature: AVX512F
 func (x Float64x8) Max(y Float64x8) Float64x8
 
 // Max computes the maximum of corresponding elements.
@@ -3741,7 +3741,7 @@ func (x Int8x32) Max(y Int8x32) Int8x32
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+// Asm: VPMAXSB, CPU Feature: AVX512BW
 func (x Int8x64) Max(y Int8x64) Int8x64
 
 // Max computes the maximum of corresponding elements.
@@ -3756,7 +3756,7 @@ func (x Int16x16) Max(y Int16x16) Int16x16
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+// Asm: VPMAXSW, CPU Feature: AVX512BW
 func (x Int16x32) Max(y Int16x32) Int16x32
 
 // Max computes the maximum of corresponding elements.
@@ -3771,22 +3771,22 @@ func (x Int32x8) Max(y Int32x8) Int32x8
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+// Asm: VPMAXSD, CPU Feature: AVX512F
 func (x Int32x16) Max(y Int32x16) Int32x16
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x2) Max(y Int64x2) Int64x2
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x4) Max(y Int64x4) Int64x4
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x8) Max(y Int64x8) Int64x8
 
 // Max computes the maximum of corresponding elements.
@@ -3801,7 +3801,7 @@ func (x Uint8x32) Max(y Uint8x32) Uint8x32
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+// Asm: VPMAXUB, CPU Feature: AVX512BW
 func (x Uint8x64) Max(y Uint8x64) Uint8x64
 
 // Max computes the maximum of corresponding elements.
@@ -3816,7 +3816,7 @@ func (x Uint16x16) Max(y Uint16x16) Uint16x16
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+// Asm: VPMAXUW, CPU Feature: AVX512BW
 func (x Uint16x32) Max(y Uint16x32) Uint16x32
 
 // Max computes the maximum of corresponding elements.
@@ -3831,174 +3831,174 @@ func (x Uint32x8) Max(y Uint32x8) Uint32x8
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+// Asm: VPMAXUD, CPU Feature: AVX512F
 func (x Uint32x16) Max(y Uint32x16) Uint32x16
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x2) Max(y Uint64x2) Uint64x2
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x4) Max(y Uint64x4) Uint64x4
 
 // Max computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x8) Max(y Uint64x8) Uint64x8
 
 /* MaxMasked */
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
+// Asm: VMAXPS, CPU Feature: AVX512F
 func (x Float32x4) MaxMasked(y Float32x4, z Mask32x4) Float32x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
+// Asm: VMAXPS, CPU Feature: AVX512F
 func (x Float32x8) MaxMasked(y Float32x8, z Mask32x8) Float32x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VMAXPS, CPU Feature: AVX512EVEX
+// Asm: VMAXPS, CPU Feature: AVX512F
 func (x Float32x16) MaxMasked(y Float32x16, z Mask32x16) Float32x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
+// Asm: VMAXPD, CPU Feature: AVX512F
 func (x Float64x2) MaxMasked(y Float64x2, z Mask64x2) Float64x2
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
+// Asm: VMAXPD, CPU Feature: AVX512F
 func (x Float64x4) MaxMasked(y Float64x4, z Mask64x4) Float64x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VMAXPD, CPU Feature: AVX512EVEX
+// Asm: VMAXPD, CPU Feature: AVX512F
 func (x Float64x8) MaxMasked(y Float64x8, z Mask64x8) Float64x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+// Asm: VPMAXSB, CPU Feature: AVX512BW
 func (x Int8x16) MaxMasked(y Int8x16, z Mask8x16) Int8x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+// Asm: VPMAXSB, CPU Feature: AVX512BW
 func (x Int8x32) MaxMasked(y Int8x32, z Mask8x32) Int8x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSB, CPU Feature: AVX512EVEX
+// Asm: VPMAXSB, CPU Feature: AVX512BW
 func (x Int8x64) MaxMasked(y Int8x64, z Mask8x64) Int8x64
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+// Asm: VPMAXSW, CPU Feature: AVX512BW
 func (x Int16x8) MaxMasked(y Int16x8, z Mask16x8) Int16x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+// Asm: VPMAXSW, CPU Feature: AVX512BW
 func (x Int16x16) MaxMasked(y Int16x16, z Mask16x16) Int16x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSW, CPU Feature: AVX512EVEX
+// Asm: VPMAXSW, CPU Feature: AVX512BW
 func (x Int16x32) MaxMasked(y Int16x32, z Mask16x32) Int16x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+// Asm: VPMAXSD, CPU Feature: AVX512F
 func (x Int32x4) MaxMasked(y Int32x4, z Mask32x4) Int32x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+// Asm: VPMAXSD, CPU Feature: AVX512F
 func (x Int32x8) MaxMasked(y Int32x8, z Mask32x8) Int32x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSD, CPU Feature: AVX512EVEX
+// Asm: VPMAXSD, CPU Feature: AVX512F
 func (x Int32x16) MaxMasked(y Int32x16, z Mask32x16) Int32x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x2) MaxMasked(y Int64x2, z Mask64x2) Int64x2
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x4) MaxMasked(y Int64x4, z Mask64x4) Int64x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXSQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXSQ, CPU Feature: AVX512F
 func (x Int64x8) MaxMasked(y Int64x8, z Mask64x8) Int64x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+// Asm: VPMAXUB, CPU Feature: AVX512BW
 func (x Uint8x16) MaxMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+// Asm: VPMAXUB, CPU Feature: AVX512BW
 func (x Uint8x32) MaxMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUB, CPU Feature: AVX512EVEX
+// Asm: VPMAXUB, CPU Feature: AVX512BW
 func (x Uint8x64) MaxMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+// Asm: VPMAXUW, CPU Feature: AVX512BW
 func (x Uint16x8) MaxMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+// Asm: VPMAXUW, CPU Feature: AVX512BW
 func (x Uint16x16) MaxMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUW, CPU Feature: AVX512EVEX
+// Asm: VPMAXUW, CPU Feature: AVX512BW
 func (x Uint16x32) MaxMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+// Asm: VPMAXUD, CPU Feature: AVX512F
 func (x Uint32x4) MaxMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+// Asm: VPMAXUD, CPU Feature: AVX512F
 func (x Uint32x8) MaxMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUD, CPU Feature: AVX512EVEX
+// Asm: VPMAXUD, CPU Feature: AVX512F
 func (x Uint32x16) MaxMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x2) MaxMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x4) MaxMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // MaxMasked computes the maximum of corresponding elements.
 //
-// Asm: VPMAXUQ, CPU Feature: AVX512EVEX
+// Asm: VPMAXUQ, CPU Feature: AVX512F
 func (x Uint64x8) MaxMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* Min */
@@ -4015,7 +4015,7 @@ func (x Float32x8) Min(y Float32x8) Float32x8
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VMINPS, CPU Feature: AVX512EVEX
+// Asm: VMINPS, CPU Feature: AVX512F
 func (x Float32x16) Min(y Float32x16) Float32x16
 
 // Min computes the minimum of corresponding elements.
@@ -4030,7 +4030,7 @@ func (x Float64x4) Min(y Float64x4) Float64x4
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VMINPD, CPU Feature: AVX512EVEX
+// Asm: VMINPD, CPU Feature: AVX512F
 func (x Float64x8) Min(y Float64x8) Float64x8
 
 // Min computes the minimum of corresponding elements.
@@ -4045,7 +4045,7 @@ func (x Int8x32) Min(y Int8x32) Int8x32
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
+// Asm: VPMINSB, CPU Feature: AVX512BW
 func (x Int8x64) Min(y Int8x64) Int8x64
 
 // Min computes the minimum of corresponding elements.
@@ -4060,7 +4060,7 @@ func (x Int16x16) Min(y Int16x16) Int16x16
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
+// Asm: VPMINSW, CPU Feature: AVX512BW
 func (x Int16x32) Min(y Int16x32) Int16x32
 
 // Min computes the minimum of corresponding elements.
@@ -4075,22 +4075,22 @@ func (x Int32x8) Min(y Int32x8) Int32x8
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
+// Asm: VPMINSD, CPU Feature: AVX512F
 func (x Int32x16) Min(y Int32x16) Int32x16
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+// Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x2) Min(y Int64x2) Int64x2
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+// Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x4) Min(y Int64x4) Int64x4
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+// Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x8) Min(y Int64x8) Int64x8
 
 // Min computes the minimum of corresponding elements.
@@ -4105,7 +4105,7 @@ func (x Uint8x32) Min(y Uint8x32) Uint8x32
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
+// Asm: VPMINUB, CPU Feature: AVX512BW
 func (x Uint8x64) Min(y Uint8x64) Uint8x64
 
 // Min computes the minimum of corresponding elements.
@@ -4120,7 +4120,7 @@ func (x Uint16x16) Min(y Uint16x16) Uint16x16
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
+// Asm: VPMINUW, CPU Feature: AVX512BW
 func (x Uint16x32) Min(y Uint16x32) Uint16x32
 
 // Min computes the minimum of corresponding elements.
@@ -4135,174 +4135,174 @@ func (x Uint32x8) Min(y Uint32x8) Uint32x8
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
+// Asm: VPMINUD, CPU Feature: AVX512F
 func (x Uint32x16) Min(y Uint32x16) Uint32x16
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+// Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x2) Min(y Uint64x2) Uint64x2
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+// Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x4) Min(y Uint64x4) Uint64x4
 
 // Min computes the minimum of corresponding elements.
 //
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+// Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x8) Min(y Uint64x8) Uint64x8
 
 /* MinMasked */
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VMINPS, CPU Feature: AVX512EVEX
+// Asm: VMINPS, CPU Feature: AVX512F
 func (x Float32x4) MinMasked(y Float32x4, z Mask32x4) Float32x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VMINPS, CPU Feature: AVX512EVEX
+// Asm: VMINPS, CPU Feature: AVX512F
 func (x Float32x8) MinMasked(y Float32x8, z Mask32x8) Float32x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VMINPS, CPU Feature: AVX512EVEX
+// Asm: VMINPS, CPU Feature: AVX512F
 func (x Float32x16) MinMasked(y Float32x16, z Mask32x16) Float32x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VMINPD, CPU Feature: AVX512EVEX
+// Asm: VMINPD, CPU Feature: AVX512F
 func (x Float64x2) MinMasked(y Float64x2, z Mask64x2) Float64x2
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VMINPD, CPU Feature: AVX512EVEX
+// Asm: VMINPD, CPU Feature: AVX512F
 func (x Float64x4) MinMasked(y Float64x4, z Mask64x4) Float64x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VMINPD, CPU Feature: AVX512EVEX
+// Asm: VMINPD, CPU Feature: AVX512F
 func (x Float64x8) MinMasked(y Float64x8, z Mask64x8) Float64x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
+// Asm: VPMINSB, CPU Feature: AVX512BW
 func (x Int8x16) MinMasked(y Int8x16, z Mask8x16) Int8x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
+// Asm: VPMINSB, CPU Feature: AVX512BW
 func (x Int8x32) MinMasked(y Int8x32, z Mask8x32) Int8x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSB, CPU Feature: AVX512EVEX
+// Asm: VPMINSB, CPU Feature: AVX512BW
 func (x Int8x64) MinMasked(y Int8x64, z Mask8x64) Int8x64
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
+// Asm: VPMINSW, CPU Feature: AVX512BW
 func (x Int16x8) MinMasked(y Int16x8, z Mask16x8) Int16x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
+// Asm: VPMINSW, CPU Feature: AVX512BW
 func (x Int16x16) MinMasked(y Int16x16, z Mask16x16) Int16x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSW, CPU Feature: AVX512EVEX
+// Asm: VPMINSW, CPU Feature: AVX512BW
 func (x Int16x32) MinMasked(y Int16x32, z Mask16x32) Int16x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
+// Asm: VPMINSD, CPU Feature: AVX512F
 func (x Int32x4) MinMasked(y Int32x4, z Mask32x4) Int32x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
+// Asm: VPMINSD, CPU Feature: AVX512F
 func (x Int32x8) MinMasked(y Int32x8, z Mask32x8) Int32x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSD, CPU Feature: AVX512EVEX
+// Asm: VPMINSD, CPU Feature: AVX512F
 func (x Int32x16) MinMasked(y Int32x16, z Mask32x16) Int32x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+// Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x2) MinMasked(y Int64x2, z Mask64x2) Int64x2
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+// Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x4) MinMasked(y Int64x4, z Mask64x4) Int64x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINSQ, CPU Feature: AVX512EVEX
+// Asm: VPMINSQ, CPU Feature: AVX512F
 func (x Int64x8) MinMasked(y Int64x8, z Mask64x8) Int64x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
+// Asm: VPMINUB, CPU Feature: AVX512BW
 func (x Uint8x16) MinMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
+// Asm: VPMINUB, CPU Feature: AVX512BW
 func (x Uint8x32) MinMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUB, CPU Feature: AVX512EVEX
+// Asm: VPMINUB, CPU Feature: AVX512BW
 func (x Uint8x64) MinMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
+// Asm: VPMINUW, CPU Feature: AVX512BW
 func (x Uint16x8) MinMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
+// Asm: VPMINUW, CPU Feature: AVX512BW
 func (x Uint16x16) MinMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUW, CPU Feature: AVX512EVEX
+// Asm: VPMINUW, CPU Feature: AVX512BW
 func (x Uint16x32) MinMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
+// Asm: VPMINUD, CPU Feature: AVX512F
 func (x Uint32x4) MinMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
+// Asm: VPMINUD, CPU Feature: AVX512F
 func (x Uint32x8) MinMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUD, CPU Feature: AVX512EVEX
+// Asm: VPMINUD, CPU Feature: AVX512F
 func (x Uint32x16) MinMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+// Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x2) MinMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+// Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x4) MinMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // MinMasked computes the minimum of corresponding elements.
 //
-// Asm: VPMINUQ, CPU Feature: AVX512EVEX
+// Asm: VPMINUQ, CPU Feature: AVX512F
 func (x Uint64x8) MinMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* Mul */
@@ -4319,7 +4319,7 @@ func (x Float32x8) Mul(y Float32x8) Float32x8
 
 // Mul multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPS, CPU Feature: AVX512EVEX
+// Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x16) Mul(y Float32x16) Float32x16
 
 // Mul multiplies corresponding elements of two vectors.
@@ -4334,71 +4334,71 @@ func (x Float64x4) Mul(y Float64x4) Float64x4
 
 // Mul multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPD, CPU Feature: AVX512EVEX
+// Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x8) Mul(y Float64x8) Float64x8
 
 /* MulByPowOf2 */
 
 // MulByPowOf2 multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x4) MulByPowOf2(y Float32x4) Float32x4
 
 // MulByPowOf2 multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x8) MulByPowOf2(y Float32x8) Float32x8
 
 // MulByPowOf2 multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x16) MulByPowOf2(y Float32x16) Float32x16
 
 // MulByPowOf2 multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x2) MulByPowOf2(y Float64x2) Float64x2
 
 // MulByPowOf2 multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4
 
 // MulByPowOf2 multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8
 
 /* MulByPowOf2Masked */
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x4) MulByPowOf2Masked(y Float32x4, z Mask32x4) Float32x4
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x8) MulByPowOf2Masked(y Float32x8, z Mask32x8) Float32x8
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPS, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPS, CPU Feature: AVX512F
 func (x Float32x16) MulByPowOf2Masked(y Float32x16, z Mask32x16) Float32x16
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x2) MulByPowOf2Masked(y Float64x2, z Mask64x2) Float64x2
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x4) MulByPowOf2Masked(y Float64x4, z Mask64x4) Float64x4
 
 // MulByPowOf2Masked multiplies elements by a power of 2.
 //
-// Asm: VSCALEFPD, CPU Feature: AVX512EVEX
+// Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x8) MulByPowOf2Masked(y Float64x8, z Mask64x8) Float64x8
 
 /* MulEvenWiden */
@@ -4418,19 +4418,19 @@ func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
 // MulEvenWiden multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2
 
 // MulEvenWiden multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4
 
 // MulEvenWiden multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x8) MulEvenWiden(y Int64x8) Int64x8
 
 // MulEvenWiden multiplies even-indexed elements, widening the result.
@@ -4448,19 +4448,19 @@ func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
 // MulEvenWiden multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2
 
 // MulEvenWiden multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4
 
 // MulEvenWiden multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8
 
 /* MulEvenWidenMasked */
@@ -4468,37 +4468,37 @@ func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8
 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x2) MulEvenWidenMasked(y Int64x2, z Mask64x2) Int64x2
 
 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x4) MulEvenWidenMasked(y Int64x4, z Mask64x4) Int64x4
 
 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULDQ, CPU Feature: AVX512F
 func (x Int64x8) MulEvenWidenMasked(y Int64x8, z Mask64x8) Int64x8
 
 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
 // Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPMULUDQ, CPU Feature: AVX512EVEX
+// Asm: VPMULUDQ, CPU Feature: AVX512F
 func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* MulHigh */
@@ -4515,7 +4515,7 @@ func (x Int16x16) MulHigh(y Int16x16) Int16x16
 
 // MulHigh multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
+// Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x32) MulHigh(y Int16x32) Int16x32
 
 // MulHigh multiplies elements and stores the high part of the result.
@@ -4530,39 +4530,39 @@ func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
 
 // MulHigh multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHUW, CPU Feature: AVX512EVEX
+// Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
 
 /* MulHighMasked */
 
 // MulHighMasked multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
+// Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x8) MulHighMasked(y Int16x8, z Mask16x8) Int16x8
 
 // MulHighMasked multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
+// Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x16) MulHighMasked(y Int16x16, z Mask16x16) Int16x16
 
 // MulHighMasked multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHW, CPU Feature: AVX512EVEX
+// Asm: VPMULHW, CPU Feature: AVX512BW
 func (x Int16x32) MulHighMasked(y Int16x32, z Mask16x32) Int16x32
 
 // MulHighMasked multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHUW, CPU Feature: AVX512EVEX
+// Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x8) MulHighMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // MulHighMasked multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHUW, CPU Feature: AVX512EVEX
+// Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x16) MulHighMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // MulHighMasked multiplies elements and stores the high part of the result, masked.
 //
-// Asm: VPMULHUW, CPU Feature: AVX512EVEX
+// Asm: VPMULHUW, CPU Feature: AVX512BW
 func (x Uint16x32) MulHighMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 /* MulLow */
@@ -4579,7 +4579,7 @@ func (x Int16x16) MulLow(y Int16x16) Int16x16
 
 // MulLow multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
+// Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x32) MulLow(y Int16x32) Int16x32
 
 // MulLow multiplies elements and stores the low part of the result.
@@ -4594,101 +4594,101 @@ func (x Int32x8) MulLow(y Int32x8) Int32x8
 
 // MulLow multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
+// Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x16) MulLow(y Int32x16) Int32x16
 
 // MulLow multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x2) MulLow(y Int64x2) Int64x2
 
 // MulLow multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x4) MulLow(y Int64x4) Int64x4
 
 // MulLow multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x8) MulLow(y Int64x8) Int64x8
 
 /* MulLowMasked */
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
+// Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x8) MulLowMasked(y Int16x8, z Mask16x8) Int16x8
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
+// Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x16) MulLowMasked(y Int16x16, z Mask16x16) Int16x16
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLW, CPU Feature: AVX512EVEX
+// Asm: VPMULLW, CPU Feature: AVX512BW
 func (x Int16x32) MulLowMasked(y Int16x32, z Mask16x32) Int16x32
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
+// Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x4) MulLowMasked(y Int32x4, z Mask32x4) Int32x4
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
+// Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x8) MulLowMasked(y Int32x8, z Mask32x8) Int32x8
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLD, CPU Feature: AVX512EVEX
+// Asm: VPMULLD, CPU Feature: AVX512F
 func (x Int32x16) MulLowMasked(y Int32x16, z Mask32x16) Int32x16
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x2) MulLowMasked(y Int64x2, z Mask64x2) Int64x2
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x4) MulLowMasked(y Int64x4, z Mask64x4) Int64x4
 
 // MulLowMasked multiplies elements and stores the low part of the result, masked.
 //
-// Asm: VPMULLQ, CPU Feature: AVX512EVEX
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
 func (x Int64x8) MulLowMasked(y Int64x8, z Mask64x8) Int64x8
 
 /* MulMasked */
 
 // MulMasked multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPS, CPU Feature: AVX512EVEX
+// Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x4) MulMasked(y Float32x4, z Mask32x4) Float32x4
 
 // MulMasked multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPS, CPU Feature: AVX512EVEX
+// Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x8) MulMasked(y Float32x8, z Mask32x8) Float32x8
 
 // MulMasked multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPS, CPU Feature: AVX512EVEX
+// Asm: VMULPS, CPU Feature: AVX512F
 func (x Float32x16) MulMasked(y Float32x16, z Mask32x16) Float32x16
 
 // MulMasked multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPD, CPU Feature: AVX512EVEX
+// Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x2) MulMasked(y Float64x2, z Mask64x2) Float64x2
 
 // MulMasked multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPD, CPU Feature: AVX512EVEX
+// Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x4) MulMasked(y Float64x4, z Mask64x4) Float64x4
 
 // MulMasked multiplies corresponding elements of two vectors, masked.
 //
-// Asm: VMULPD, CPU Feature: AVX512EVEX
+// Asm: VMULPD, CPU Feature: AVX512F
 func (x Float64x8) MulMasked(y Float64x8, z Mask64x8) Float64x8
 
 /* NotEqual */
@@ -4705,7 +4705,7 @@ func (x Float32x8) NotEqual(y Float32x8) Mask32x8
 
 // NotEqual compares for inequality.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) NotEqual(y Float32x16) Mask32x16
 
 // NotEqual compares for inequality.
@@ -4720,279 +4720,279 @@ func (x Float64x4) NotEqual(y Float64x4) Mask64x4
 
 // NotEqual compares for inequality.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) NotEqual(y Float64x8) Mask64x8
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) NotEqual(y Int8x16) Mask8x16
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) NotEqual(y Int8x32) Mask8x32
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) NotEqual(y Int8x64) Mask8x64
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) NotEqual(y Int16x8) Mask16x8
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) NotEqual(y Int16x16) Mask16x16
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) NotEqual(y Int16x32) Mask16x32
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) NotEqual(y Int32x4) Mask32x4
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) NotEqual(y Int32x8) Mask32x8
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) NotEqual(y Int32x16) Mask32x16
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) NotEqual(y Int64x2) Mask64x2
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) NotEqual(y Int64x4) Mask64x4
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) NotEqual(y Int64x8) Mask64x8
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4
 
 // NotEqual compares for inequality.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
 
 /* NotEqualMasked */
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x4) NotEqualMasked(y Float32x4, z Mask32x4) Mask32x4
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x8) NotEqualMasked(y Float32x8, z Mask32x8) Mask32x8
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VCMPPS, CPU Feature: AVX512EVEX
+// Asm: VCMPPS, CPU Feature: AVX512F
 func (x Float32x16) NotEqualMasked(y Float32x16, z Mask32x16) Mask32x16
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x2) NotEqualMasked(y Float64x2, z Mask64x2) Mask64x2
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x4) NotEqualMasked(y Float64x4, z Mask64x4) Mask64x4
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VCMPPD, CPU Feature: AVX512EVEX
+// Asm: VCMPPD, CPU Feature: AVX512F
 func (x Float64x8) NotEqualMasked(y Float64x8, z Mask64x8) Mask64x8
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x16) NotEqualMasked(y Int8x16, z Mask8x16) Mask8x16
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x32) NotEqualMasked(y Int8x32, z Mask8x32) Mask8x32
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512EVEX
+// Asm: VPCMPB, CPU Feature: AVX512BW
 func (x Int8x64) NotEqualMasked(y Int8x64, z Mask8x64) Mask8x64
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x8) NotEqualMasked(y Int16x8, z Mask16x8) Mask16x8
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x16) NotEqualMasked(y Int16x16, z Mask16x16) Mask16x16
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512EVEX
+// Asm: VPCMPW, CPU Feature: AVX512BW
 func (x Int16x32) NotEqualMasked(y Int16x32, z Mask16x32) Mask16x32
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x4) NotEqualMasked(y Int32x4, z Mask32x4) Mask32x4
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x8) NotEqualMasked(y Int32x8, z Mask32x8) Mask32x8
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512EVEX
+// Asm: VPCMPD, CPU Feature: AVX512F
 func (x Int32x16) NotEqualMasked(y Int32x16, z Mask32x16) Mask32x16
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x2) NotEqualMasked(y Int64x2, z Mask64x2) Mask64x2
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x4) NotEqualMasked(y Int64x4, z Mask64x4) Mask64x4
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPQ, CPU Feature: AVX512F
 func (x Int64x8) NotEqualMasked(y Int64x8, z Mask64x8) Mask64x8
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x16) NotEqualMasked(y Uint8x16, z Mask8x16) Mask8x16
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x32) NotEqualMasked(y Uint8x32, z Mask8x32) Mask8x32
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
+// Asm: VPCMPUB, CPU Feature: AVX512BW
 func (x Uint8x64) NotEqualMasked(y Uint8x64, z Mask8x64) Mask8x64
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x8) NotEqualMasked(y Uint16x8, z Mask16x8) Mask16x8
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x16) NotEqualMasked(y Uint16x16, z Mask16x16) Mask16x16
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
+// Asm: VPCMPUW, CPU Feature: AVX512BW
 func (x Uint16x32) NotEqualMasked(y Uint16x32, z Mask16x32) Mask16x32
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x4) NotEqualMasked(y Uint32x4, z Mask32x4) Mask32x4
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x8) NotEqualMasked(y Uint32x8, z Mask32x8) Mask32x8
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
+// Asm: VPCMPUD, CPU Feature: AVX512F
 func (x Uint32x16) NotEqualMasked(y Uint32x16, z Mask32x16) Mask32x16
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x2) NotEqualMasked(y Uint64x2, z Mask64x2) Mask64x2
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x4) NotEqualMasked(y Uint64x4, z Mask64x4) Mask64x4
 
 // NotEqualMasked compares for inequality.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
+// Asm: VPCMPUQ, CPU Feature: AVX512F
 func (x Uint64x8) NotEqualMasked(y Uint64x8, z Mask64x8) Mask64x8
 
 /* Or */
@@ -5029,7 +5029,7 @@ func (x Int32x8) Or(y Int32x8) Int32x8
 
 // Or performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x16) Or(y Int32x16) Int32x16
 
 // Or performs a bitwise OR operation between two vectors.
@@ -5044,7 +5044,7 @@ func (x Int64x4) Or(y Int64x4) Int64x4
 
 // Or performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x8) Or(y Int64x8) Int64x8
 
 // Or performs a bitwise OR operation between two vectors.
@@ -5079,7 +5079,7 @@ func (x Uint32x8) Or(y Uint32x8) Uint32x8
 
 // Or performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x16) Or(y Uint32x16) Uint32x16
 
 // Or performs a bitwise OR operation between two vectors.
@@ -5094,69 +5094,69 @@ func (x Uint64x4) Or(y Uint64x4) Uint64x4
 
 // Or performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x8) Or(y Uint64x8) Uint64x8
 
 /* OrMasked */
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x4) OrMasked(y Int32x4, z Mask32x4) Int32x4
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x8) OrMasked(y Int32x8, z Mask32x8) Int32x8
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Int32x16) OrMasked(y Int32x16, z Mask32x16) Int32x16
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x2) OrMasked(y Int64x2, z Mask64x2) Int64x2
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x4) OrMasked(y Int64x4, z Mask64x4) Int64x4
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Int64x8) OrMasked(y Int64x8, z Mask64x8) Int64x8
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x4) OrMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x8) OrMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORD, CPU Feature: AVX512EVEX
+// Asm: VPORD, CPU Feature: AVX512F
 func (x Uint32x16) OrMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x2) OrMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x4) OrMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // OrMasked performs a masked bitwise OR operation between two vectors.
 //
-// Asm: VPORQ, CPU Feature: AVX512EVEX
+// Asm: VPORQ, CPU Feature: AVX512F
 func (x Uint64x8) OrMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* PairDotProd */
@@ -5176,41 +5176,41 @@ func (x Int16x16) PairDotProd(y Int16x16) Int32x8
 // PairDotProd multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+// Asm: VPMADDWD, CPU Feature: AVX512BW
 func (x Int16x32) PairDotProd(y Int16x32) Int32x16
 
 /* PairDotProdAccumulate */
 
 // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+// Asm: VPDPWSSD, CPU Feature: AVXVNNI
 func (x Int32x4) PairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
 
 // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+// Asm: VPDPWSSD, CPU Feature: AVXVNNI
 func (x Int32x8) PairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
 
 // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
 func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
 
 /* PairDotProdAccumulateMasked */
 
 // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
 func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4
 
 // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
 func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8
 
 // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
 func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16
 
 /* PairDotProdMasked */
@@ -5218,19 +5218,19 @@ func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x1
 // PairDotProdMasked multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+// Asm: VPMADDWD, CPU Feature: AVX512BW
 func (x Int16x8) PairDotProdMasked(y Int16x8, z Mask16x8) Int32x4
 
 // PairDotProdMasked multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+// Asm: VPMADDWD, CPU Feature: AVX512BW
 func (x Int16x16) PairDotProdMasked(y Int16x16, z Mask16x16) Int32x8
 
 // PairDotProdMasked multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+// Asm: VPMADDWD, CPU Feature: AVX512BW
 func (x Int16x32) PairDotProdMasked(y Int16x32, z Mask16x32) Int32x16
 
 /* PairwiseAdd */
@@ -5385,244 +5385,244 @@ func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x16) PopCount() Int8x16
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x32) PopCount() Int8x32
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x64) PopCount() Int8x64
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x8) PopCount() Int16x8
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x16) PopCount() Int16x16
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x32) PopCount() Int16x32
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x4) PopCount() Int32x4
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x8) PopCount() Int32x8
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x16) PopCount() Int32x16
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x2) PopCount() Int64x2
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x4) PopCount() Int64x4
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x8) PopCount() Int64x8
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x16) PopCount() Uint8x16
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x32) PopCount() Uint8x32
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x64) PopCount() Uint8x64
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x8) PopCount() Uint16x8
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x16) PopCount() Uint16x16
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x32) PopCount() Uint16x32
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x4) PopCount() Uint32x4
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x8) PopCount() Uint32x8
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x16) PopCount() Uint32x16
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x2) PopCount() Uint64x2
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x4) PopCount() Uint64x4
 
 // PopCount counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x8) PopCount() Uint64x8
 
 /* PopCountMasked */
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x16) PopCountMasked(y Mask8x16) Int8x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x32) PopCountMasked(y Mask8x32) Int8x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Int8x64) PopCountMasked(y Mask8x64) Int8x64
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x8) PopCountMasked(y Mask16x8) Int16x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x16) PopCountMasked(y Mask16x16) Int16x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Int16x32) PopCountMasked(y Mask16x32) Int16x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x4) PopCountMasked(y Mask32x4) Int32x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x8) PopCountMasked(y Mask32x8) Int32x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Int32x16) PopCountMasked(y Mask32x16) Int32x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x2) PopCountMasked(y Mask64x2) Int64x2
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x4) PopCountMasked(y Mask64x4) Int64x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Int64x8) PopCountMasked(y Mask64x8) Int64x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x16) PopCountMasked(y Mask8x16) Uint8x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x32) PopCountMasked(y Mask8x32) Uint8x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTB, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
 func (x Uint8x64) PopCountMasked(y Mask8x64) Uint8x64
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x8) PopCountMasked(y Mask16x8) Uint16x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x16) PopCountMasked(y Mask16x16) Uint16x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTW, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
 func (x Uint16x32) PopCountMasked(y Mask16x32) Uint16x32
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x4) PopCountMasked(y Mask32x4) Uint32x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x8) PopCountMasked(y Mask32x8) Uint32x8
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTD, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint32x16) PopCountMasked(y Mask32x16) Uint32x16
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x2) PopCountMasked(y Mask64x2) Uint64x2
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x4) PopCountMasked(y Mask64x4) Uint64x4
 
 // PopCountMasked counts the number of set bits in each element.
 //
-// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x8) PopCountMasked(y Mask64x8) Uint64x8
 
 /* RotateAllLeft */
@@ -5631,84 +5631,84 @@ func (x Uint64x8) PopCountMasked(y Mask64x8) Uint64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Int32x4) RotateAllLeft(shift uint8) Int32x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Int32x8) RotateAllLeft(shift uint8) Int32x8
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Int32x16) RotateAllLeft(shift uint8) Int32x16
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Int64x2) RotateAllLeft(shift uint8) Int64x2
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Int64x4) RotateAllLeft(shift uint8) Int64x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Int64x8) RotateAllLeft(shift uint8) Int64x8
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
 
 /* RotateAllLeftMasked */
@@ -5717,84 +5717,84 @@ func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Int32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Int32x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Int32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Int32x8
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Int32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Int32x16
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Int64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Int64x2
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Int64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Int64x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Int64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Int64x8
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Uint32x4) RotateAllLeftMasked(shift uint8, y Mask32x4) Uint32x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Uint32x8) RotateAllLeftMasked(shift uint8, y Mask32x8) Uint32x8
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLD, CPU Feature: AVX512EVEX
+// Asm: VPROLD, CPU Feature: AVX512F
 func (x Uint32x16) RotateAllLeftMasked(shift uint8, y Mask32x16) Uint32x16
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateAllLeftMasked(shift uint8, y Mask64x2) Uint64x2
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateAllLeftMasked(shift uint8, y Mask64x4) Uint64x4
 
 // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPROLQ, CPU Feature: AVX512EVEX
+// Asm: VPROLQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Uint64x8
 
 /* RotateAllRight */
@@ -5803,84 +5803,84 @@ func (x Uint64x8) RotateAllLeftMasked(shift uint8, y Mask64x8) Uint64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Int32x4) RotateAllRight(shift uint8) Int32x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Int32x8) RotateAllRight(shift uint8) Int32x8
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Int32x16) RotateAllRight(shift uint8) Int32x16
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Int64x2) RotateAllRight(shift uint8) Int64x2
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Int64x4) RotateAllRight(shift uint8) Int64x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Int64x8) RotateAllRight(shift uint8) Int64x8
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
 
 /* RotateAllRightMasked */
@@ -5889,332 +5889,332 @@ func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Int32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Int32x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Int32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Int32x8
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Int32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Int32x16
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Int64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Int64x2
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Int64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Int64x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Int64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Int64x8
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Uint32x4) RotateAllRightMasked(shift uint8, y Mask32x4) Uint32x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Uint32x8) RotateAllRightMasked(shift uint8, y Mask32x8) Uint32x8
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORD, CPU Feature: AVX512EVEX
+// Asm: VPRORD, CPU Feature: AVX512F
 func (x Uint32x16) RotateAllRightMasked(shift uint8, y Mask32x16) Uint32x16
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateAllRightMasked(shift uint8, y Mask64x2) Uint64x2
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateAllRightMasked(shift uint8, y Mask64x4) Uint64x4
 
 // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPRORQ, CPU Feature: AVX512EVEX
+// Asm: VPRORQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateAllRightMasked(shift uint8, y Mask64x8) Uint64x8
 
 /* RotateLeft */
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x4) RotateLeft(y Int32x4) Int32x4
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x8) RotateLeft(y Int32x8) Int32x8
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x16) RotateLeft(y Int32x16) Int32x16
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x2) RotateLeft(y Int64x2) Int64x2
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x4) RotateLeft(y Int64x4) Int64x4
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x8) RotateLeft(y Int64x8) Int64x8
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
 
 /* RotateLeftMasked */
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x4) RotateLeftMasked(y Int32x4, z Mask32x4) Int32x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x8) RotateLeftMasked(y Int32x8, z Mask32x8) Int32x8
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Int32x16) RotateLeftMasked(y Int32x16, z Mask32x16) Int32x16
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x2) RotateLeftMasked(y Int64x2, z Mask64x2) Int64x2
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x4) RotateLeftMasked(y Int64x4, z Mask64x4) Int64x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Int64x8) RotateLeftMasked(y Int64x8, z Mask64x8) Int64x8
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x4) RotateLeftMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x8) RotateLeftMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVD, CPU Feature: AVX512EVEX
+// Asm: VPROLVD, CPU Feature: AVX512F
 func (x Uint32x16) RotateLeftMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateLeftMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateLeftMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+// Asm: VPROLVQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateLeftMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* RotateRight */
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x4) RotateRight(y Int32x4) Int32x4
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x8) RotateRight(y Int32x8) Int32x8
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x16) RotateRight(y Int32x16) Int32x16
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x2) RotateRight(y Int64x2) Int64x2
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x4) RotateRight(y Int64x4) Int64x4
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x8) RotateRight(y Int64x8) Int64x8
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
 
 /* RotateRightMasked */
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x4) RotateRightMasked(y Int32x4, z Mask32x4) Int32x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x8) RotateRightMasked(y Int32x8, z Mask32x8) Int32x8
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Int32x16) RotateRightMasked(y Int32x16, z Mask32x16) Int32x16
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x2) RotateRightMasked(y Int64x2, z Mask64x2) Int64x2
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x4) RotateRightMasked(y Int64x4, z Mask64x4) Int64x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Int64x8) RotateRightMasked(y Int64x8, z Mask64x8) Int64x8
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x4) RotateRightMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x8) RotateRightMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVD, CPU Feature: AVX512EVEX
+// Asm: VPRORVD, CPU Feature: AVX512F
 func (x Uint32x16) RotateRightMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x2) RotateRightMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x4) RotateRightMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 //
-// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+// Asm: VPRORVQ, CPU Feature: AVX512F
 func (x Uint64x8) RotateRightMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* Round */
@@ -6245,42 +6245,42 @@ func (x Float64x4) Round() Float64x4
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) RoundWithPrecision(prec uint8) Float32x4
 
 // RoundWithPrecision rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) RoundWithPrecision(prec uint8) Float32x8
 
 // RoundWithPrecision rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) RoundWithPrecision(prec uint8) Float32x16
 
 // RoundWithPrecision rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) RoundWithPrecision(prec uint8) Float64x2
 
 // RoundWithPrecision rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) RoundWithPrecision(prec uint8) Float64x4
 
 // RoundWithPrecision rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) RoundWithPrecision(prec uint8) Float64x8
 
 /* RoundWithPrecisionMasked */
@@ -6289,42 +6289,42 @@ func (x Float64x8) RoundWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) RoundWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) RoundWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) RoundWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) RoundWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) RoundWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // RoundWithPrecisionMasked rounds elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) RoundWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* SaturatedAdd */
@@ -6341,7 +6341,7 @@ func (x Int8x32) SaturatedAdd(y Int8x32) Int8x32
 
 // SaturatedAdd adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Int8x64) SaturatedAdd(y Int8x64) Int8x64
 
 // SaturatedAdd adds corresponding elements of two vectors with saturation.
@@ -6356,7 +6356,7 @@ func (x Int16x16) SaturatedAdd(y Int16x16) Int16x16
 
 // SaturatedAdd adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32
 
 // SaturatedAdd adds corresponding elements of two vectors with saturation.
@@ -6371,7 +6371,7 @@ func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32
 
 // SaturatedAdd adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedAdd(y Uint8x64) Uint8x64
 
 // SaturatedAdd adds corresponding elements of two vectors with saturation.
@@ -6386,103 +6386,103 @@ func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16
 
 // SaturatedAdd adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
 
 /* SaturatedAddMasked */
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Int8x16) SaturatedAddMasked(y Int8x16, z Mask8x16) Int8x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Int8x32) SaturatedAddMasked(y Int8x32, z Mask8x32) Int8x32
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Int8x64) SaturatedAddMasked(y Int8x64, z Mask8x64) Int8x64
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Int16x8) SaturatedAddMasked(y Int16x8, z Mask16x8) Int16x8
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Int16x16) SaturatedAddMasked(y Int16x16, z Mask16x16) Int16x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Int16x32) SaturatedAddMasked(y Int16x32, z Mask16x32) Int16x32
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Uint8x16) SaturatedAddMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Uint8x32) SaturatedAddMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSB, CPU Feature: AVX512EVEX
+// Asm: VPADDSB, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedAddMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Uint16x8) SaturatedAddMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Uint16x16) SaturatedAddMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDSW, CPU Feature: AVX512EVEX
+// Asm: VPADDSW, CPU Feature: AVX512BW
 func (x Uint16x32) SaturatedAddMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 /* SaturatedPairDotProdAccumulate */
 
 // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
 func (x Int32x4) SaturatedPairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
 
 // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
 func (x Int32x8) SaturatedPairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
 
 // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
 
 /* SaturatedPairDotProdAccumulateMasked */
 
 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4
 
 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8
 
 // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16
 
 /* SaturatedPairwiseAdd */
@@ -6527,7 +6527,7 @@ func (x Int8x32) SaturatedSub(y Int8x32) Int8x32
 
 // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Int8x64) SaturatedSub(y Int8x64) Int8x64
 
 // SaturatedSub subtracts corresponding elements of two vectors with saturation.
@@ -6542,7 +6542,7 @@ func (x Int16x16) SaturatedSub(y Int16x16) Int16x16
 
 // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Int16x32) SaturatedSub(y Int16x32) Int16x32
 
 // SaturatedSub subtracts corresponding elements of two vectors with saturation.
@@ -6557,7 +6557,7 @@ func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32
 
 // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedSub(y Uint8x64) Uint8x64
 
 // SaturatedSub subtracts corresponding elements of two vectors with saturation.
@@ -6572,69 +6572,69 @@ func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16
 
 // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
 
 /* SaturatedSubMasked */
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Int8x16) SaturatedSubMasked(y Int8x16, z Mask8x16) Int8x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Int8x32) SaturatedSubMasked(y Int8x32, z Mask8x32) Int8x32
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Int8x64) SaturatedSubMasked(y Int8x64, z Mask8x64) Int8x64
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Int16x8) SaturatedSubMasked(y Int16x8, z Mask16x8) Int16x8
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Int16x16) SaturatedSubMasked(y Int16x16, z Mask16x16) Int16x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Int16x32) SaturatedSubMasked(y Int16x32, z Mask16x32) Int16x32
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Uint8x16) SaturatedSubMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Uint8x32) SaturatedSubMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSB, CPU Feature: AVX512EVEX
+// Asm: VPSUBSB, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedSubMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Uint16x8) SaturatedSubMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Uint16x16) SaturatedSubMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 //
-// Asm: VPSUBSW, CPU Feature: AVX512EVEX
+// Asm: VPSUBSW, CPU Feature: AVX512BW
 func (x Uint16x32) SaturatedSubMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 /* SaturatedUnsignedSignedPairDotProd */
@@ -6654,7 +6654,7 @@ func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
 // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32
 
 /* SaturatedUnsignedSignedPairDotProdMasked */
@@ -6662,83 +6662,83 @@ func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32
 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
 func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, z Mask16x8) Int16x8
 
 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
 func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, z Mask16x16) Int16x16
 
 // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
 func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, z Mask16x32) Int16x32
 
 /* SaturatedUnsignedSignedQuadDotProdAccumulate */
 
 // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
 func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
 
 // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
 func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
 
 // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
 
 // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
 func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
 
 // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
 func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
 
 // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
 
 /* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
 
 // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
 
 /* Set128 */
@@ -6885,7 +6885,7 @@ func (x Int16x16) ShiftAllLeft(y uint64) Int16x16
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftAllLeft(y uint64) Int16x32
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
@@ -6900,7 +6900,7 @@ func (x Int32x8) ShiftAllLeft(y uint64) Int32x8
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Int32x16) ShiftAllLeft(y uint64) Int32x16
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
@@ -6915,7 +6915,7 @@ func (x Int64x4) ShiftAllLeft(y uint64) Int64x4
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftAllLeft(y uint64) Int64x8
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
@@ -6930,7 +6930,7 @@ func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
@@ -6945,7 +6945,7 @@ func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
@@ -6960,7 +6960,7 @@ func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4
 
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8
 
 /* ShiftAllLeftAndFillUpperFrom */
@@ -6970,7 +6970,7 @@ func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x8) Int16x8
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -6978,7 +6978,7 @@ func (x Int16x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x8) Int16x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x16) Int16x16
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -6986,7 +6986,7 @@ func (x Int16x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x16) Int16x16
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x32) Int16x32
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -6994,7 +6994,7 @@ func (x Int16x32) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x32) Int16x32
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x4) Int32x4
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7002,7 +7002,7 @@ func (x Int32x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x4) Int32x4
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x8) Int32x8
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7010,7 +7010,7 @@ func (x Int32x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x8) Int32x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x16) Int32x16
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7018,7 +7018,7 @@ func (x Int32x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x16) Int32x16
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x2) Int64x2
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7026,7 +7026,7 @@ func (x Int64x2) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x2) Int64x2
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x4) Int64x4
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7034,7 +7034,7 @@ func (x Int64x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x4) Int64x4
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x8) Int64x8
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7042,7 +7042,7 @@ func (x Int64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x8) Int64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x8) Uint16x8
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7050,7 +7050,7 @@ func (x Uint16x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x8) Uint16x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x16) Uint16x16
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7058,7 +7058,7 @@ func (x Uint16x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x16) Uint16
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x32) Uint16x32
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7066,7 +7066,7 @@ func (x Uint16x32) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x32) Uint16
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x4) Uint32x4
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7074,7 +7074,7 @@ func (x Uint32x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x4) Uint32x4
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x8) Uint32x8
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7082,7 +7082,7 @@ func (x Uint32x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x8) Uint32x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x16) Uint32x16
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7090,7 +7090,7 @@ func (x Uint32x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x16) Uint32
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x2) Uint64x2
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7098,7 +7098,7 @@ func (x Uint64x2) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x2) Uint64x2
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x4) Uint64x4
 
 // ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
@@ -7106,7 +7106,7 @@ func (x Uint64x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x4) Uint64x4
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
 
 /* ShiftAllLeftAndFillUpperFromMasked */
@@ -7116,7 +7116,7 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7124,7 +7124,7 @@ func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, z Ma
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7132,7 +7132,7 @@ func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7140,7 +7140,7 @@ func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7148,7 +7148,7 @@ func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, z Ma
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7156,7 +7156,7 @@ func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, z Ma
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7164,7 +7164,7 @@ func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7172,7 +7172,7 @@ func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, z Ma
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7180,7 +7180,7 @@ func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, z Ma
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7188,7 +7188,7 @@ func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, z Ma
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7196,7 +7196,7 @@ func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7204,7 +7204,7 @@ func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16,
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7212,7 +7212,7 @@ func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32,
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7220,7 +7220,7 @@ func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7228,7 +7228,7 @@ func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7236,7 +7236,7 @@ func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16,
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7244,7 +7244,7 @@ func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4
 
 // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
@@ -7252,99 +7252,99 @@ func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8
 
 /* ShiftAllLeftMasked */
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Int16x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Int16x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Int16x32
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Int32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Int32x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Int32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Int32x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Int32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Int32x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Int64x2
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Uint16x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Uint16x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLW, CPU Feature: AVX512EVEX
+// Asm: VPSLLW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Uint16x32
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Uint32x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Uint32x8
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLD, CPU Feature: AVX512EVEX
+// Asm: VPSLLD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Uint32x16
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Uint64x2
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Uint64x4
 
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8
 
 /* ShiftAllRight */
@@ -7361,7 +7361,7 @@ func (x Int16x16) ShiftAllRight(y uint64) Int16x16
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAW, CPU Feature: AVX512EVEX
+// Asm: VPSRAW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftAllRight(y uint64) Int16x32
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
@@ -7376,22 +7376,22 @@ func (x Int32x8) ShiftAllRight(y uint64) Int32x8
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAD, CPU Feature: AVX512EVEX
+// Asm: VPSRAD, CPU Feature: AVX512F
 func (x Int32x16) ShiftAllRight(y uint64) Int32x16
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftAllRight(y uint64) Int64x2
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftAllRight(y uint64) Int64x4
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftAllRight(y uint64) Int64x8
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
@@ -7406,7 +7406,7 @@ func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLW, CPU Feature: AVX512EVEX
+// Asm: VPSRLW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
@@ -7421,7 +7421,7 @@ func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLD, CPU Feature: AVX512EVEX
+// Asm: VPSRLD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
@@ -7436,7 +7436,7 @@ func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8
 
 /* ShiftAllRightAndFillUpperFrom */
@@ -7446,7 +7446,7 @@ func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x8) Int16x8
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7454,7 +7454,7 @@ func (x Int16x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x8) Int16x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x16) Int16x16
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7462,7 +7462,7 @@ func (x Int16x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x16) Int16x1
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x32) Int16x32
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7470,7 +7470,7 @@ func (x Int16x32) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x32) Int16x3
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x4) Int32x4
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7478,7 +7478,7 @@ func (x Int32x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x4) Int32x4
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x8) Int32x8
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7486,7 +7486,7 @@ func (x Int32x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x8) Int32x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x16) Int32x16
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7494,7 +7494,7 @@ func (x Int32x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x16) Int32x1
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x2) Int64x2
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7502,7 +7502,7 @@ func (x Int64x2) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x2) Int64x2
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x4) Int64x4
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7510,7 +7510,7 @@ func (x Int64x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x4) Int64x4
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x8) Int64x8
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7518,7 +7518,7 @@ func (x Int64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x8) Int64x8
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x8) Uint16x8
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7526,7 +7526,7 @@ func (x Uint16x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x8) Uint16x
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x16) Uint16x16
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7534,7 +7534,7 @@ func (x Uint16x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x16) Uint1
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x32) Uint16x32
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7542,7 +7542,7 @@ func (x Uint16x32) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x32) Uint1
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x4) Uint32x4
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7550,7 +7550,7 @@ func (x Uint32x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x4) Uint32x
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x8) Uint32x8
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7558,7 +7558,7 @@ func (x Uint32x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x8) Uint32x
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x16) Uint32x16
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7566,7 +7566,7 @@ func (x Uint32x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x16) Uint3
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x2) Uint64x2
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7574,7 +7574,7 @@ func (x Uint64x2) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x2) Uint64x
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x4) Uint64x4
 
 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
@@ -7582,7 +7582,7 @@ func (x Uint64x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x4) Uint64x
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
 
 /* ShiftAllRightAndFillUpperFromMasked */
@@ -7592,7 +7592,7 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, z Mask16x8) Int16x8
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7600,7 +7600,7 @@ func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, z M
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, z Mask16x16) Int16x16
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7608,7 +7608,7 @@ func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, z Mask16x32) Int16x32
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7616,7 +7616,7 @@ func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, z Mask32x4) Int32x4
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7624,7 +7624,7 @@ func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, z M
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, z Mask32x8) Int32x8
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7632,7 +7632,7 @@ func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, z M
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, z Mask32x16) Int32x16
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7640,7 +7640,7 @@ func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, z Mask64x2) Int64x2
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7648,7 +7648,7 @@ func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, z M
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, z Mask64x4) Int64x4
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7656,7 +7656,7 @@ func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, z M
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, z Mask64x8) Int64x8
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7664,7 +7664,7 @@ func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, z M
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, z Mask16x8) Uint16x8
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7672,7 +7672,7 @@ func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, z Mask16x16) Uint16x16
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7680,7 +7680,7 @@ func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16,
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, z Mask16x32) Uint16x32
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7688,7 +7688,7 @@ func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32,
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, z Mask32x4) Uint32x4
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7696,7 +7696,7 @@ func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, z Mask32x8) Uint32x8
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7704,7 +7704,7 @@ func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, z Mask32x16) Uint32x16
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7712,7 +7712,7 @@ func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16,
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, z Mask64x2) Uint64x2
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7720,7 +7720,7 @@ func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, z Mask64x4) Uint64x4
 
 // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
@@ -7728,116 +7728,116 @@ func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, z
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, z Mask64x8) Uint64x8
 
 /* ShiftAllRightMasked */
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAW, CPU Feature: AVX512EVEX
+// Asm: VPSRAW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Int16x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAW, CPU Feature: AVX512EVEX
+// Asm: VPSRAW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Int16x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAW, CPU Feature: AVX512EVEX
+// Asm: VPSRAW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Int16x32
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAD, CPU Feature: AVX512EVEX
+// Asm: VPSRAD, CPU Feature: AVX512F
 func (x Int32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Int32x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAD, CPU Feature: AVX512EVEX
+// Asm: VPSRAD, CPU Feature: AVX512F
 func (x Int32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Int32x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAD, CPU Feature: AVX512EVEX
+// Asm: VPSRAD, CPU Feature: AVX512F
 func (x Int32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Int32x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLW, CPU Feature: AVX512EVEX
+// Asm: VPSRLW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Uint16x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLW, CPU Feature: AVX512EVEX
+// Asm: VPSRLW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Uint16x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLW, CPU Feature: AVX512EVEX
+// Asm: VPSRLW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Uint16x32
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLD, CPU Feature: AVX512EVEX
+// Asm: VPSRLD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Uint32x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLD, CPU Feature: AVX512EVEX
+// Asm: VPSRLD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Uint32x8
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLD, CPU Feature: AVX512EVEX
+// Asm: VPSRLD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Uint32x16
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4
 
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8
 
 /* ShiftLeft */
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftLeft(y Int16x8) Int16x8
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftLeft(y Int16x16) Int16x16
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftLeft(y Int16x32) Int16x32
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
@@ -7852,7 +7852,7 @@ func (x Int32x8) ShiftLeft(y Int32x8) Int32x8
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Int32x16) ShiftLeft(y Int32x16) Int32x16
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
@@ -7867,22 +7867,22 @@ func (x Int64x4) ShiftLeft(y Int64x4) Int64x4
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftLeft(y Int64x8) Int64x8
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
@@ -7897,7 +7897,7 @@ func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
@@ -7912,7 +7912,7 @@ func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
 
 /* ShiftLeftAndFillUpperFrom */
@@ -7920,109 +7920,109 @@ func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftLeftAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftLeftAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftLeftAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftLeftAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftLeftAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftLeftAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftLeftAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftLeftAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftLeftAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftLeftAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftLeftAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftLeftAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftLeftAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftLeftAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4
 
 // ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
 
 /* ShiftLeftAndFillUpperFromMasked */
@@ -8030,218 +8030,218 @@ func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
 
 // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
 
 /* ShiftLeftMasked */
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftLeftMasked(y Int16x8, z Mask16x8) Int16x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftLeftMasked(y Int16x16, z Mask16x16) Int16x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftLeftMasked(y Int16x32, z Mask16x32) Int16x32
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Int32x4) ShiftLeftMasked(y Int32x4, z Mask32x4) Int32x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Int32x8) ShiftLeftMasked(y Int32x8, z Mask32x8) Int32x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Int32x16) ShiftLeftMasked(y Int32x16, z Mask32x16) Int32x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftLeftMasked(y Int64x2, z Mask64x2) Int64x2
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftLeftMasked(y Int64x4, z Mask64x4) Int64x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftLeftMasked(y Int64x8, z Mask64x8) Int64x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftLeftMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftLeftMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+// Asm: VPSLLVW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftLeftMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftLeftMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftLeftMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+// Asm: VPSLLVD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftLeftMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftLeftMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftLeftMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* ShiftRight */
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftRight(y Int16x8) Int16x8
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftRight(y Int16x16) Int16x16
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftRight(y Int16x32) Int16x32
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
@@ -8256,37 +8256,37 @@ func (x Int32x8) ShiftRight(y Int32x8) Int32x8
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512F
 func (x Int32x16) ShiftRight(y Int32x16) Int32x16
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftRight(y Int64x2) Int64x2
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftRight(y Int64x4) Int64x4
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftRight(y Int64x8) Int64x8
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
@@ -8301,7 +8301,7 @@ func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRLVD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
@@ -8316,7 +8316,7 @@ func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
 
 /* ShiftRightAndFillUpperFrom */
@@ -8324,109 +8324,109 @@ func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftRightAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftRightAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftRightAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftRightAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftRightAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftRightAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftRightAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftRightAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftRightAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftRightAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftRightAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftRightAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftRightAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftRightAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftRightAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftRightAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4
 
 // ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
 
 /* ShiftRightAndFillUpperFromMasked */
@@ -8434,201 +8434,201 @@ func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
 func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
 func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
 
 // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
 
 /* ShiftRightMasked */
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512BW
 func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512F
 func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512F
 func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512F
 func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512F
 func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x8) ShiftRightMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x16) ShiftRightMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRLVW, CPU Feature: AVX512BW
 func (x Uint16x32) ShiftRightMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRLVD, CPU Feature: AVX512F
 func (x Uint32x4) ShiftRightMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRLVD, CPU Feature: AVX512F
 func (x Uint32x8) ShiftRightMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRLVD, CPU Feature: AVX512F
 func (x Uint32x16) ShiftRightMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x2) ShiftRightMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* Sign */
@@ -8683,7 +8683,7 @@ func (x Float32x8) Sqrt() Float32x8
 
 // Sqrt computes the square root of each element.
 //
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+// Asm: VSQRTPS, CPU Feature: AVX512F
 func (x Float32x16) Sqrt() Float32x16
 
 // Sqrt computes the square root of each element.
@@ -8698,39 +8698,39 @@ func (x Float64x4) Sqrt() Float64x4
 
 // Sqrt computes the square root of each element.
 //
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+// Asm: VSQRTPD, CPU Feature: AVX512F
 func (x Float64x8) Sqrt() Float64x8
 
 /* SqrtMasked */
 
 // SqrtMasked computes the square root of each element.
 //
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+// Asm: VSQRTPS, CPU Feature: AVX512F
 func (x Float32x4) SqrtMasked(y Mask32x4) Float32x4
 
 // SqrtMasked computes the square root of each element.
 //
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+// Asm: VSQRTPS, CPU Feature: AVX512F
 func (x Float32x8) SqrtMasked(y Mask32x8) Float32x8
 
 // SqrtMasked computes the square root of each element.
 //
-// Asm: VSQRTPS, CPU Feature: AVX512EVEX
+// Asm: VSQRTPS, CPU Feature: AVX512F
 func (x Float32x16) SqrtMasked(y Mask32x16) Float32x16
 
 // SqrtMasked computes the square root of each element.
 //
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+// Asm: VSQRTPD, CPU Feature: AVX512F
 func (x Float64x2) SqrtMasked(y Mask64x2) Float64x2
 
 // SqrtMasked computes the square root of each element.
 //
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+// Asm: VSQRTPD, CPU Feature: AVX512F
 func (x Float64x4) SqrtMasked(y Mask64x4) Float64x4
 
 // SqrtMasked computes the square root of each element.
 //
-// Asm: VSQRTPD, CPU Feature: AVX512EVEX
+// Asm: VSQRTPD, CPU Feature: AVX512F
 func (x Float64x8) SqrtMasked(y Mask64x8) Float64x8
 
 /* Sub */
@@ -8747,7 +8747,7 @@ func (x Float32x8) Sub(y Float32x8) Float32x8
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPS, CPU Feature: AVX512EVEX
+// Asm: VSUBPS, CPU Feature: AVX512F
 func (x Float32x16) Sub(y Float32x16) Float32x16
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8762,7 +8762,7 @@ func (x Float64x4) Sub(y Float64x4) Float64x4
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPD, CPU Feature: AVX512EVEX
+// Asm: VSUBPD, CPU Feature: AVX512F
 func (x Float64x8) Sub(y Float64x8) Float64x8
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8777,7 +8777,7 @@ func (x Int8x32) Sub(y Int8x32) Int8x32
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Int8x64) Sub(y Int8x64) Int8x64
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8792,7 +8792,7 @@ func (x Int16x16) Sub(y Int16x16) Int16x16
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Int16x32) Sub(y Int16x32) Int16x32
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8807,7 +8807,7 @@ func (x Int32x8) Sub(y Int32x8) Int32x8
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Int32x16) Sub(y Int32x16) Int32x16
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8822,7 +8822,7 @@ func (x Int64x4) Sub(y Int64x4) Int64x4
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Int64x8) Sub(y Int64x8) Int64x8
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8837,7 +8837,7 @@ func (x Uint8x32) Sub(y Uint8x32) Uint8x32
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Uint8x64) Sub(y Uint8x64) Uint8x64
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8852,7 +8852,7 @@ func (x Uint16x16) Sub(y Uint16x16) Uint16x16
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Uint16x32) Sub(y Uint16x32) Uint16x32
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8867,7 +8867,7 @@ func (x Uint32x8) Sub(y Uint32x8) Uint32x8
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Uint32x16) Sub(y Uint32x16) Uint32x16
 
 // Sub subtracts corresponding elements of two vectors.
@@ -8882,159 +8882,159 @@ func (x Uint64x4) Sub(y Uint64x4) Uint64x4
 
 // Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Uint64x8) Sub(y Uint64x8) Uint64x8
 
 /* SubMasked */
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPS, CPU Feature: AVX512EVEX
+// Asm: VSUBPS, CPU Feature: AVX512F
 func (x Float32x4) SubMasked(y Float32x4, z Mask32x4) Float32x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPS, CPU Feature: AVX512EVEX
+// Asm: VSUBPS, CPU Feature: AVX512F
 func (x Float32x8) SubMasked(y Float32x8, z Mask32x8) Float32x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPS, CPU Feature: AVX512EVEX
+// Asm: VSUBPS, CPU Feature: AVX512F
 func (x Float32x16) SubMasked(y Float32x16, z Mask32x16) Float32x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPD, CPU Feature: AVX512EVEX
+// Asm: VSUBPD, CPU Feature: AVX512F
 func (x Float64x2) SubMasked(y Float64x2, z Mask64x2) Float64x2
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPD, CPU Feature: AVX512EVEX
+// Asm: VSUBPD, CPU Feature: AVX512F
 func (x Float64x4) SubMasked(y Float64x4, z Mask64x4) Float64x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VSUBPD, CPU Feature: AVX512EVEX
+// Asm: VSUBPD, CPU Feature: AVX512F
 func (x Float64x8) SubMasked(y Float64x8, z Mask64x8) Float64x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Int8x16) SubMasked(y Int8x16, z Mask8x16) Int8x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Int8x32) SubMasked(y Int8x32, z Mask8x32) Int8x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Int8x64) SubMasked(y Int8x64, z Mask8x64) Int8x64
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Int16x8) SubMasked(y Int16x8, z Mask16x8) Int16x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Int16x16) SubMasked(y Int16x16, z Mask16x16) Int16x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Int16x32) SubMasked(y Int16x32, z Mask16x32) Int16x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Int32x4) SubMasked(y Int32x4, z Mask32x4) Int32x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Int32x8) SubMasked(y Int32x8, z Mask32x8) Int32x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Int32x16) SubMasked(y Int32x16, z Mask32x16) Int32x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Int64x2) SubMasked(y Int64x2, z Mask64x2) Int64x2
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Int64x4) SubMasked(y Int64x4, z Mask64x4) Int64x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Int64x8) SubMasked(y Int64x8, z Mask64x8) Int64x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Uint8x16) SubMasked(y Uint8x16, z Mask8x16) Uint8x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Uint8x32) SubMasked(y Uint8x32, z Mask8x32) Uint8x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBB, CPU Feature: AVX512EVEX
+// Asm: VPSUBB, CPU Feature: AVX512BW
 func (x Uint8x64) SubMasked(y Uint8x64, z Mask8x64) Uint8x64
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Uint16x8) SubMasked(y Uint16x8, z Mask16x8) Uint16x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Uint16x16) SubMasked(y Uint16x16, z Mask16x16) Uint16x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512EVEX
+// Asm: VPSUBW, CPU Feature: AVX512BW
 func (x Uint16x32) SubMasked(y Uint16x32, z Mask16x32) Uint16x32
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Uint32x4) SubMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Uint32x8) SubMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512EVEX
+// Asm: VPSUBD, CPU Feature: AVX512F
 func (x Uint32x16) SubMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Uint64x2) SubMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Uint64x4) SubMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // SubMasked subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512EVEX
+// Asm: VPSUBQ, CPU Feature: AVX512F
 func (x Uint64x8) SubMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* Trunc */
@@ -9065,42 +9065,42 @@ func (x Float64x4) Trunc() Float64x4
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) TruncWithPrecision(prec uint8) Float32x4
 
 // TruncWithPrecision truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) TruncWithPrecision(prec uint8) Float32x8
 
 // TruncWithPrecision truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) TruncWithPrecision(prec uint8) Float32x16
 
 // TruncWithPrecision truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) TruncWithPrecision(prec uint8) Float64x2
 
 // TruncWithPrecision truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) TruncWithPrecision(prec uint8) Float64x4
 
 // TruncWithPrecision truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) TruncWithPrecision(prec uint8) Float64x8
 
 /* TruncWithPrecisionMasked */
@@ -9109,106 +9109,106 @@ func (x Float64x8) TruncWithPrecision(prec uint8) Float64x8
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x4) TruncWithPrecisionMasked(prec uint8, y Mask32x4) Float32x4
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x8) TruncWithPrecisionMasked(prec uint8, y Mask32x8) Float32x8
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
 func (x Float32x16) TruncWithPrecisionMasked(prec uint8, y Mask32x16) Float32x16
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x2) TruncWithPrecisionMasked(prec uint8, y Mask64x2) Float64x2
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x4) TruncWithPrecisionMasked(prec uint8, y Mask64x4) Float64x4
 
 // TruncWithPrecisionMasked truncates elements with specified precision.
 //
 // prec is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
 func (x Float64x8) TruncWithPrecisionMasked(prec uint8, y Mask64x8) Float64x8
 
 /* UnsignedSignedQuadDotProdAccumulate */
 
 // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSD, CPU Feature: AVXVNNI
 func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
 
 // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSD, CPU Feature: AVXVNNI
 func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
 
 // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
 
 // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSD, CPU Feature: AVXVNNI
 func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
 
 // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+// Asm: VPDPBUSD, CPU Feature: AVXVNNI
 func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
 
 // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
 
 /* UnsignedSignedQuadDotProdAccumulateMasked */
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
 
 // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 //
-// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
 
 /* Xor */
@@ -9245,7 +9245,7 @@ func (x Int32x8) Xor(y Int32x8) Int32x8
 
 // Xor performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x16) Xor(y Int32x16) Int32x16
 
 // Xor performs a bitwise XOR operation between two vectors.
@@ -9260,7 +9260,7 @@ func (x Int64x4) Xor(y Int64x4) Int64x4
 
 // Xor performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x8) Xor(y Int64x8) Int64x8
 
 // Xor performs a bitwise XOR operation between two vectors.
@@ -9295,7 +9295,7 @@ func (x Uint32x8) Xor(y Uint32x8) Uint32x8
 
 // Xor performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x16) Xor(y Uint32x16) Uint32x16
 
 // Xor performs a bitwise XOR operation between two vectors.
@@ -9310,69 +9310,69 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4
 
 // Xor performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x8) Xor(y Uint64x8) Uint64x8
 
 /* XorMasked */
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x4) XorMasked(y Int32x4, z Mask32x4) Int32x4
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x8) XorMasked(y Int32x8, z Mask32x8) Int32x8
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Int32x16) XorMasked(y Int32x16, z Mask32x16) Int32x16
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x2) XorMasked(y Int64x2, z Mask64x2) Int64x2
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x4) XorMasked(y Int64x4, z Mask64x4) Int64x4
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Int64x8) XorMasked(y Int64x8, z Mask64x8) Int64x8
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x4) XorMasked(y Uint32x4, z Mask32x4) Uint32x4
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x8) XorMasked(y Uint32x8, z Mask32x8) Uint32x8
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORD, CPU Feature: AVX512EVEX
+// Asm: VPXORD, CPU Feature: AVX512F
 func (x Uint32x16) XorMasked(y Uint32x16, z Mask32x16) Uint32x16
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x2) XorMasked(y Uint64x2, z Mask64x2) Uint64x2
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x4) XorMasked(y Uint64x4, z Mask64x4) Uint64x4
 
 // XorMasked performs a masked bitwise XOR operation between two vectors.
 //
-// Asm: VPXORQ, CPU Feature: AVX512EVEX
+// Asm: VPXORQ, CPU Feature: AVX512F
 func (x Uint64x8) XorMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 // Float64x2 converts from Float32x4 to Float64x2