// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.

//go:build goexperiment.simd

package simd

/* Absolute */

// Absolute computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX
func (x Int8x16) Absolute() Int8x16

// Absolute computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX2
func (x Int8x32) Absolute() Int8x32

// Absolute computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX512BW
func (x Int8x64) Absolute() Int8x64

// Absolute computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX
func (x Int16x8) Absolute() Int16x8

// Absolute computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX2
func (x Int16x16) Absolute() Int16x16

// Absolute computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX512BW
func (x Int16x32) Absolute() Int16x32

// Absolute computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX
func (x Int32x4) Absolute() Int32x4

// Absolute computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX2
func (x Int32x8) Absolute() Int32x8

// Absolute computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX512F
func (x Int32x16) Absolute() Int32x16

// Absolute computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
func (x Int64x2) Absolute() Int64x2

// Absolute computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
func (x Int64x4) Absolute() Int64x4

// Absolute computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
func (x Int64x8) Absolute() Int64x8

/* AbsoluteMasked */

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSB, CPU Feature: AVX512BW
func (x Int8x16) AbsoluteMasked(mask Mask8x16) Int8x16

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSB, CPU Feature: AVX512BW
func (x Int8x32) AbsoluteMasked(mask Mask8x32) Int8x32

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSB, CPU Feature: AVX512BW
func (x Int8x64) AbsoluteMasked(mask Mask8x64) Int8x64

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSW, CPU Feature: AVX512BW
func (x Int16x8) AbsoluteMasked(mask Mask16x8) Int16x8

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSW, CPU Feature: AVX512BW
func (x Int16x16) AbsoluteMasked(mask Mask16x16) Int16x16

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSW, CPU Feature: AVX512BW
func (x Int16x32) AbsoluteMasked(mask Mask16x32) Int16x32

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSD, CPU Feature: AVX512F
func (x Int32x4) AbsoluteMasked(mask Mask32x4) Int32x4

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSD, CPU Feature: AVX512F
func (x Int32x8) AbsoluteMasked(mask Mask32x8) Int32x8

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSD, CPU Feature: AVX512F
func (x Int32x16) AbsoluteMasked(mask Mask32x16) Int32x16

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSQ, CPU Feature: AVX512F
func (x Int64x2) AbsoluteMasked(mask Mask64x2) Int64x2

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSQ, CPU Feature: AVX512F
func (x Int64x4) AbsoluteMasked(mask Mask64x4) Int64x4

// AbsoluteMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSQ, CPU Feature: AVX512F
func (x Int64x8) AbsoluteMasked(mask Mask64x8) Int64x8

/* Add */

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX
func (x Float32x4) Add(y Float32x4) Float32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX
func (x Float32x8) Add(y Float32x8) Float32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX512F
func (x Float32x16) Add(y Float32x16) Float32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX
func (x Float64x2) Add(y Float64x2) Float64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX
func (x Float64x4) Add(y Float64x4) Float64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPD, CPU Feature: AVX512F
func (x Float64x8) Add(y Float64x8) Float64x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX
func (x Int8x16) Add(y Int8x16) Int8x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX2
func (x Int8x32) Add(y Int8x32) Int8x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Int8x64) Add(y Int8x64) Int8x64

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX
func (x Int16x8) Add(y Int16x8) Int16x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX2
func (x Int16x16) Add(y Int16x16) Int16x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Int16x32) Add(y Int16x32) Int16x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX
func (x Int32x4) Add(y Int32x4) Int32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX2
func (x Int32x8) Add(y Int32x8) Int32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Int32x16) Add(y Int32x16) Int32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX
func (x Int64x2) Add(y Int64x2) Int64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX2
func (x Int64x4) Add(y Int64x4) Int64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Int64x8) Add(y Int64x8) Int64x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX
func (x Uint8x16) Add(y Uint8x16) Uint8x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX2
func (x Uint8x32) Add(y Uint8x32) Uint8x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Uint8x64) Add(y Uint8x64) Uint8x64

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX
func (x Uint16x8) Add(y Uint16x8) Uint16x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX2
func (x Uint16x16) Add(y Uint16x16) Uint16x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Uint16x32) Add(y Uint16x32) Uint16x32

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX
func (x Uint32x4) Add(y Uint32x4) Uint32x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX2
func (x Uint32x8) Add(y Uint32x8) Uint32x8

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Uint32x16) Add(y Uint32x16) Uint32x16

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX
func (x Uint64x2) Add(y Uint64x2) Uint64x2

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX2
func (x Uint64x4) Add(y Uint64x4) Uint64x4

// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Uint64x8) Add(y Uint64x8) Uint64x8

/* AddMasked */

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VADDPS, CPU Feature: AVX512F
func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VADDPS, CPU Feature: AVX512F
func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VADDPS, CPU Feature: AVX512F
func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VADDPD, CPU Feature: AVX512F
func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VADDPD, CPU Feature: AVX512F
func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VADDPD, CPU Feature: AVX512F
func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDB, CPU Feature: AVX512BW
func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDW, CPU Feature: AVX512BW
func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDD, CPU Feature: AVX512F
func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4

// AddMasked adds corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* AddSub */

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPS, CPU Feature: AVX
func (x Float32x4) AddSub(y Float32x4) Float32x4

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPS, CPU Feature: AVX
func (x Float32x8) AddSub(y Float32x8) Float32x8

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPD, CPU Feature: AVX
func (x Float64x2) AddSub(y Float64x2) Float64x2

// AddSub subtracts even elements and adds odd elements of two vectors.
//
// Asm: VADDSUBPD, CPU Feature: AVX
func (x Float64x4) AddSub(y Float64x4) Float64x4

/* And */

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int8x16) And(y Int8x16) Int8x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int8x32) And(y Int8x32) Int8x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int16x8) And(y Int16x8) Int16x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int16x16) And(y Int16x16) Int16x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int32x4) And(y Int32x4) Int32x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int32x8) And(y Int32x8) Int32x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Int32x16) And(y Int32x16) Int32x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Int64x2) And(y Int64x2) Int64x2

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Int64x4) And(y Int64x4) Int64x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Int64x8) And(y Int64x8) Int64x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint8x16) And(y Uint8x16) Uint8x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint8x32) And(y Uint8x32) Uint8x32

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint16x8) And(y Uint16x8) Uint16x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint16x16) And(y Uint16x16) Uint16x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint32x4) And(y Uint32x4) Uint32x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint32x8) And(y Uint32x8) Uint32x8

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Uint32x16) And(y Uint32x16) Uint32x16

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX
func (x Uint64x2) And(y Uint64x2) Uint64x2

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPAND, CPU Feature: AVX2
func (x Uint64x4) And(y Uint64x4) Uint64x4

// And performs a bitwise AND operation between two vectors.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Uint64x8) And(y Uint64x8) Uint64x8

/* AndMasked */

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDD, CPU Feature: AVX512F
func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4

// AndMasked performs a bitwise AND operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDQ, CPU Feature: AVX512F
func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* AndNot */

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int8x16) AndNot(y Int8x16) Int8x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int8x32) AndNot(y Int8x32) Int8x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int16x8) AndNot(y Int16x8) Int16x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int16x16) AndNot(y Int16x16) Int16x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int32x4) AndNot(y Int32x4) Int32x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int32x8) AndNot(y Int32x8) Int32x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Int32x16) AndNot(y Int32x16) Int32x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Int64x2) AndNot(y Int64x2) Int64x2

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Int64x4) AndNot(y Int64x4) Int64x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Int64x8) AndNot(y Int64x8) Int64x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint8x16) AndNot(y Uint8x16) Uint8x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint8x32) AndNot(y Uint8x32) Uint8x32

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint16x8) AndNot(y Uint16x8) Uint16x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint16x16) AndNot(y Uint16x16) Uint16x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint32x4) AndNot(y Uint32x4) Uint32x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint32x8) AndNot(y Uint32x8) Uint32x8

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Uint32x16) AndNot(y Uint32x16) Uint32x16

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX
func (x Uint64x2) AndNot(y Uint64x2) Uint64x2

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDN, CPU Feature: AVX2
func (x Uint64x4) AndNot(y Uint64x4) Uint64x4

// AndNot performs a bitwise x &^ y.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Uint64x8) AndNot(y Uint64x8) Uint64x8

/* AndNotMasked */

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDND, CPU Feature: AVX512F
func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4

// AndNotMasked performs a bitwise x &^ y.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* ApproximateReciprocal */

// ApproximateReciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCPPS, CPU Feature: AVX
func (x Float32x4) ApproximateReciprocal() Float32x4

// ApproximateReciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCPPS, CPU Feature: AVX
func (x Float32x8) ApproximateReciprocal() Float32x8

// ApproximateReciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PS, CPU Feature: AVX512F
func (x Float32x16) ApproximateReciprocal() Float32x16

// ApproximateReciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
func (x Float64x2) ApproximateReciprocal() Float64x2

// ApproximateReciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
func (x Float64x4) ApproximateReciprocal() Float64x4

// ApproximateReciprocal computes an approximate reciprocal of each element.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
func (x Float64x8) ApproximateReciprocal() Float64x8

/* ApproximateReciprocalMasked */

// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRCP14PS, CPU Feature: AVX512F
func (x Float32x4) ApproximateReciprocalMasked(mask Mask32x4) Float32x4

// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRCP14PS, CPU Feature: AVX512F
func (x Float32x8) ApproximateReciprocalMasked(mask Mask32x8) Float32x8

// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRCP14PS, CPU Feature: AVX512F
func (x Float32x16) ApproximateReciprocalMasked(mask Mask32x16) Float32x16

// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
func (x Float64x2) ApproximateReciprocalMasked(mask Mask64x2) Float64x2

// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
func (x Float64x4) ApproximateReciprocalMasked(mask Mask64x4) Float64x4

// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRCP14PD, CPU Feature: AVX512F
func (x Float64x8) ApproximateReciprocalMasked(mask Mask64x8) Float64x8

/* ApproximateReciprocalOfSqrt */

// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRTPS, CPU Feature: AVX
func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4

// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRTPS, CPU Feature: AVX
func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8

// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512F
func (x Float32x16) ApproximateReciprocalOfSqrt() Float32x16

// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2

// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4

// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8

/* ApproximateReciprocalOfSqrtMasked */

// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512F
func (x Float32x4) ApproximateReciprocalOfSqrtMasked(mask Mask32x4) Float32x4

// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512F
func (x Float32x8) ApproximateReciprocalOfSqrtMasked(mask Mask32x8) Float32x8

// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRSQRT14PS, CPU Feature: AVX512F
func (x Float32x16) ApproximateReciprocalOfSqrtMasked(mask Mask32x16) Float32x16

// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
func (x Float64x2) ApproximateReciprocalOfSqrtMasked(mask Mask64x2) Float64x2

// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
func (x Float64x4) ApproximateReciprocalOfSqrtMasked(mask Mask64x4) Float64x4

// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VRSQRT14PD, CPU Feature: AVX512F
func (x Float64x8) ApproximateReciprocalOfSqrtMasked(mask Mask64x8) Float64x8

/* Average */

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX
func (x Uint8x16) Average(y Uint8x16) Uint8x16

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX2
func (x Uint8x32) Average(y Uint8x32) Uint8x32

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGB, CPU Feature: AVX512BW
func (x Uint8x64) Average(y Uint8x64) Uint8x64

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX
func (x Uint16x8) Average(y Uint16x8) Uint16x8

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX2
func (x Uint16x16) Average(y Uint16x16) Uint16x16

// Average computes the rounded average of corresponding elements.
//
// Asm: VPAVGW, CPU Feature: AVX512BW
func (x Uint16x32) Average(y Uint16x32) Uint16x32

/* AverageMasked */

// AverageMasked computes the rounded average of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPAVGB, CPU Feature: AVX512BW
func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16

// AverageMasked computes the rounded average of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPAVGB, CPU Feature: AVX512BW
func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32

// AverageMasked computes the rounded average of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPAVGB, CPU Feature: AVX512BW
func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64

// AverageMasked computes the rounded average of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPAVGW, CPU Feature: AVX512BW
func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8

// AverageMasked computes the rounded average of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPAVGW, CPU Feature: AVX512BW
func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16

// AverageMasked computes the rounded average of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPAVGW, CPU Feature: AVX512BW
func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32

/* Ceil */

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) Ceil() Float32x4

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) Ceil() Float32x8

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) Ceil() Float64x2

// Ceil rounds elements up to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Ceil() Float64x4

/* CeilWithPrecision */

// CeilWithPrecision rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) CeilWithPrecision(prec uint8) Float32x4

// CeilWithPrecision rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) CeilWithPrecision(prec uint8) Float32x8

// CeilWithPrecision rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) CeilWithPrecision(prec uint8) Float32x16

// CeilWithPrecision rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) CeilWithPrecision(prec uint8) Float64x2

// CeilWithPrecision rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) CeilWithPrecision(prec uint8) Float64x4

// CeilWithPrecision rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) CeilWithPrecision(prec uint8) Float64x8

/* CeilWithPrecisionMasked */

// CeilWithPrecisionMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) CeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// CeilWithPrecisionMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) CeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// CeilWithPrecisionMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) CeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// CeilWithPrecisionMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) CeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// CeilWithPrecisionMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// CeilWithPrecisionMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) CeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* Compress */

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512F
func (x Float32x4) Compress(mask Mask32x4) Float32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512F
func (x Float32x8) Compress(mask Mask32x8) Float32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPS, CPU Feature: AVX512F
func (x Float32x16) Compress(mask Mask32x16) Float32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512F
func (x Float64x2) Compress(mask Mask64x2) Float64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512F
func (x Float64x4) Compress(mask Mask64x4) Float64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VCOMPRESSPD, CPU Feature: AVX512F
func (x Float64x8) Compress(mask Mask64x8) Float64x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Int8x16) Compress(mask Mask8x16) Int8x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Int8x32) Compress(mask Mask8x32) Int8x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Int8x64) Compress(mask Mask8x64) Int8x64

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Int16x8) Compress(mask Mask16x8) Int16x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Int16x16) Compress(mask Mask16x16) Int16x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Int16x32) Compress(mask Mask16x32) Int16x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512F
func (x Int32x4) Compress(mask Mask32x4) Int32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512F
func (x Int32x8) Compress(mask Mask32x8) Int32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512F
func (x Int32x16) Compress(mask Mask32x16) Int32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
func (x Int64x2) Compress(mask Mask64x2) Int64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
func (x Int64x4) Compress(mask Mask64x4) Int64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
func (x Int64x8) Compress(mask Mask64x8) Int64x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Uint8x16) Compress(mask Mask8x16) Uint8x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Uint8x32) Compress(mask Mask8x32) Uint8x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
func (x Uint8x64) Compress(mask Mask8x64) Uint8x64

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Uint16x8) Compress(mask Mask16x8) Uint16x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Uint16x16) Compress(mask Mask16x16) Uint16x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
func (x Uint16x32) Compress(mask Mask16x32) Uint16x32

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512F
func (x Uint32x4) Compress(mask Mask32x4) Uint32x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512F
func (x Uint32x8) Compress(mask Mask32x8) Uint32x8

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSD, CPU Feature: AVX512F
func (x Uint32x16) Compress(mask Mask32x16) Uint32x16

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
func (x Uint64x2) Compress(mask Mask64x2) Uint64x2

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
func (x Uint64x4) Compress(mask Mask64x4) Uint64x4

// Compress performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
//
// Asm: VPCOMPRESSQ, CPU Feature: AVX512F
func (x Uint64x8) Compress(mask Mask64x8) Uint64x8

/* ConvertToInt32 */

// ConvertToInt32 converts element values to int32.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX
func (x Float32x4) ConvertToInt32() Int32x4

// ConvertToInt32 converts element values to int32.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX
func (x Float32x8) ConvertToInt32() Int32x8

// ConvertToInt32 converts element values to int32.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
func (x Float32x16) ConvertToInt32() Int32x16

/* ConvertToInt32Masked */

// ConvertToInt32 converts element values to int32.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
func (x Float32x4) ConvertToInt32Masked(mask Mask32x4) Int32x4

// ConvertToInt32 converts element values to int32.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
func (x Float32x8) ConvertToInt32Masked(mask Mask32x8) Int32x8

// ConvertToInt32 converts element values to int32.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCVTTPS2DQ, CPU Feature: AVX512F
func (x Float32x16) ConvertToInt32Masked(mask Mask32x16) Int32x16

/* ConvertToUint32 */

// ConvertToUint32Masked converts element values to uint32.
//
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x4) ConvertToUint32() Uint32x4

// ConvertToUint32Masked converts element values to uint32.
//
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x8) ConvertToUint32() Uint32x8

// ConvertToUint32Masked converts element values to uint32.
//
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x16) ConvertToUint32() Uint32x16

/* ConvertToUint32Masked */

// ConvertToUint32Masked converts element values to uint32.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x4) ConvertToUint32Masked(mask Mask32x4) Uint32x4

// ConvertToUint32Masked converts element values to uint32.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x8) ConvertToUint32Masked(mask Mask32x8) Uint32x8

// ConvertToUint32Masked converts element values to uint32.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16

/* DiffWithCeilWithPrecision */

// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithCeilWithPrecision(prec uint8) Float32x4

// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithCeilWithPrecision(prec uint8) Float32x8

// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithCeilWithPrecision(prec uint8) Float32x16

// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithCeilWithPrecision(prec uint8) Float64x2

// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithCeilWithPrecision(prec uint8) Float64x4

// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithCeilWithPrecision(prec uint8) Float64x8

/* DiffWithCeilWithPrecisionMasked */

// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* DiffWithFloorWithPrecision */

// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithFloorWithPrecision(prec uint8) Float32x4

// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithFloorWithPrecision(prec uint8) Float32x8

// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithFloorWithPrecision(prec uint8) Float32x16

// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithFloorWithPrecision(prec uint8) Float64x2

// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithFloorWithPrecision(prec uint8) Float64x4

// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithFloorWithPrecision(prec uint8) Float64x8

/* DiffWithFloorWithPrecisionMasked */

// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* DiffWithRoundWithPrecision */

// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithRoundWithPrecision(prec uint8) Float32x4

// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithRoundWithPrecision(prec uint8) Float32x8

// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithRoundWithPrecision(prec uint8) Float32x16

// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithRoundWithPrecision(prec uint8) Float64x2

// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithRoundWithPrecision(prec uint8) Float64x4

// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithRoundWithPrecision(prec uint8) Float64x8

/* DiffWithRoundWithPrecisionMasked */

// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* DiffWithTruncWithPrecision */

// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithTruncWithPrecision(prec uint8) Float32x4

// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithTruncWithPrecision(prec uint8) Float32x8

// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithTruncWithPrecision(prec uint8) Float32x16

// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithTruncWithPrecision(prec uint8) Float64x2

// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithTruncWithPrecision(prec uint8) Float64x4

// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithTruncWithPrecision(prec uint8) Float64x8

/* DiffWithTruncWithPrecisionMasked */

// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* Div */

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX
func (x Float32x4) Div(y Float32x4) Float32x4

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX
func (x Float32x8) Div(y Float32x8) Float32x8

// Div divides elements of two vectors.
//
// Asm: VDIVPS, CPU Feature: AVX512F
func (x Float32x16) Div(y Float32x16) Float32x16

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX
func (x Float64x2) Div(y Float64x2) Float64x2

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX
func (x Float64x4) Div(y Float64x4) Float64x4

// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX512F
func (x Float64x8) Div(y Float64x8) Float64x8

/* DivMasked */

// DivMasked divides elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VDIVPS, CPU Feature: AVX512F
func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4

// DivMasked divides elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VDIVPS, CPU Feature: AVX512F
func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8

// DivMasked divides elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VDIVPS, CPU Feature: AVX512F
func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16

// DivMasked divides elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VDIVPD, CPU Feature: AVX512F
func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2

// DivMasked divides elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VDIVPD, CPU Feature: AVX512F
func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4

// DivMasked divides elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VDIVPD, CPU Feature: AVX512F
func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8

/* DotProdBroadcast */

// DotProdBroadcast multiplies all elements and broadcasts the sum.
//
// Asm: VDPPS, CPU Feature: AVX
func (x Float32x4) DotProdBroadcast(y Float32x4) Float32x4

// DotProdBroadcast multiplies all elements and broadcasts the sum.
//
// Asm: VDPPS, CPU Feature: AVX
func (x Float32x8) DotProdBroadcast(y Float32x8) Float32x8

// DotProdBroadcast multiplies all elements and broadcasts the sum.
//
// Asm: VDPPD, CPU Feature: AVX
func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2

/* Equal */

// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Int8x16) Equal(y Int8x16) Mask8x16

// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Int8x32) Equal(y Int8x32) Mask8x32

// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX512BW
func (x Int8x64) Equal(y Int8x64) Mask8x64

// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Int16x8) Equal(y Int16x8) Mask16x8

// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Int16x16) Equal(y Int16x16) Mask16x16

// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX512BW
func (x Int16x32) Equal(y Int16x32) Mask16x32

// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Int32x4) Equal(y Int32x4) Mask32x4

// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Int32x8) Equal(y Int32x8) Mask32x8

// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX512F
func (x Int32x16) Equal(y Int32x16) Mask32x16

// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Int64x2) Equal(y Int64x2) Mask64x2

// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Int64x4) Equal(y Int64x4) Mask64x4

// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512F
func (x Int64x8) Equal(y Int64x8) Mask64x8

// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Uint8x16) Equal(y Uint8x16) Mask8x16

// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Uint8x32) Equal(y Uint8x32) Mask8x32

// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX512BW
func (x Uint8x64) Equal(y Uint8x64) Mask8x64

// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Uint16x8) Equal(y Uint16x8) Mask16x8

// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Uint16x16) Equal(y Uint16x16) Mask16x16

// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX512BW
func (x Uint16x32) Equal(y Uint16x32) Mask16x32

// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Uint32x4) Equal(y Uint32x4) Mask32x4

// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Uint32x8) Equal(y Uint32x8) Mask32x8

// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX512F
func (x Uint32x16) Equal(y Uint32x16) Mask32x16

// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Uint64x2) Equal(y Uint64x2) Mask64x2

// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Uint64x4) Equal(y Uint64x4) Mask64x4

// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512F
func (x Uint64x8) Equal(y Uint64x8) Mask64x8

// Equal compares for equality.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Equal(y Float32x4) Mask32x4

// Equal compares for equality.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Equal(y Float32x8) Mask32x8

// Equal compares for equality.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) Equal(y Float32x16) Mask32x16

// Equal compares for equality.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Equal(y Float64x2) Mask64x2

// Equal compares for equality.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Equal(y Float64x4) Mask64x4

// Equal compares for equality.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) Equal(y Float64x8) Mask64x8

/* EqualMasked */

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4

// EqualMasked compares for equality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8

/* Floor */

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) Floor() Float32x4

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) Floor() Float32x8

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) Floor() Float64x2

// Floor rounds elements down to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Floor() Float64x4

/* FloorWithPrecision */

// FloorWithPrecision rounds elements down with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) FloorWithPrecision(prec uint8) Float32x4

// FloorWithPrecision rounds elements down with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) FloorWithPrecision(prec uint8) Float32x8

// FloorWithPrecision rounds elements down with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) FloorWithPrecision(prec uint8) Float32x16

// FloorWithPrecision rounds elements down with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) FloorWithPrecision(prec uint8) Float64x2

// FloorWithPrecision rounds elements down with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) FloorWithPrecision(prec uint8) Float64x4

// FloorWithPrecision rounds elements down with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) FloorWithPrecision(prec uint8) Float64x8

/* FloorWithPrecisionMasked */

// FloorWithPrecisionMasked rounds elements down with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) FloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// FloorWithPrecisionMasked rounds elements down with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) FloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// FloorWithPrecisionMasked rounds elements down with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) FloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// FloorWithPrecisionMasked rounds elements down with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) FloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// FloorWithPrecisionMasked rounds elements down with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) FloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// FloorWithPrecisionMasked rounds elements down with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) FloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* FusedMultiplyAdd */

// FusedMultiplyAdd performs (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAdd(y Float32x4, z Float32x4) Float32x4

// FusedMultiplyAdd performs (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAdd(y Float32x8, z Float32x8) Float32x8

// FusedMultiplyAdd performs (x * y) + z.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAdd(y Float32x16, z Float32x16) Float32x16

// FusedMultiplyAdd performs (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAdd(y Float64x2, z Float64x2) Float64x2

// FusedMultiplyAdd performs (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4

// FusedMultiplyAdd performs (x * y) + z.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8

/* FusedMultiplyAddMasked */

// FusedMultiplyAddMasked performs (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4

// FusedMultiplyAddMasked performs (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8

// FusedMultiplyAddMasked performs (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16

// FusedMultiplyAddMasked performs (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2

// FusedMultiplyAddMasked performs (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4

// FusedMultiplyAddMasked performs (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8

/* FusedMultiplyAddSub */

// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAddSub(y Float32x4, z Float32x4) Float32x4

// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAddSub(y Float32x8, z Float32x8) Float32x8

// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAddSub(y Float32x16, z Float32x16) Float32x16

// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAddSub(y Float64x2, z Float64x2) Float64x2

// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4

// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8

/* FusedMultiplyAddSubMasked */

// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4

// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8

// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16

// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2

// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4

// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8

/* FusedMultiplySubAdd */

// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplySubAdd(y Float32x4, z Float32x4) Float32x4

// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplySubAdd(y Float32x8, z Float32x8) Float32x8

// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplySubAdd(y Float32x16, z Float32x16) Float32x16

// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplySubAdd(y Float64x2, z Float64x2) Float64x2

// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4

// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8

/* FusedMultiplySubAddMasked */

// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4

// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8

// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16

// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2

// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4

// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8

/* GaloisFieldAffineTransform */

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32

// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64

/* GaloisFieldAffineTransformInverse */

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldAffineTransformInverse(y Uint64x2, b uint8) Uint8x16

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x32

// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64

/* GaloisFieldAffineTransformInverseMasked */

// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// This operation is applied selectively under a write mask.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16

// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// This operation is applied selectively under a write mask.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32

// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// This operation is applied selectively under a write mask.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64

/* GaloisFieldAffineTransformMasked */

// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// This operation is applied selectively under a write mask.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16

// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// This operation is applied selectively under a write mask.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32

// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
//
// This operation is applied selectively under a write mask.
//
// b is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64

/* GaloisFieldMul */

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldMul(y Uint8x16) Uint8x16

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32

// GaloisFieldMul computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64

/* GaloisFieldMulMasked */

// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// This operation is applied selectively under a write mask.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16

// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// This operation is applied selectively under a write mask.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32

// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
//
// This operation is applied selectively under a write mask.
//
// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64

/* Get128 */

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float32x8) Get128(index uint8) Float32x4

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float64x4) Get128(index uint8) Float64x2

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int8x32) Get128(index uint8) Int8x16

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int16x16) Get128(index uint8) Int16x8

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int32x8) Get128(index uint8) Int32x4

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int64x4) Get128(index uint8) Int64x2

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint8x32) Get128(index uint8) Uint8x16

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint16x16) Get128(index uint8) Uint16x8

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint32x8) Get128(index uint8) Uint32x4

// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint64x4) Get128(index uint8) Uint64x2

/* GetElem */

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRB, CPU Feature: AVX512BW
func (x Int8x16) GetElem(index uint8) int8

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRW, CPU Feature: AVX512BW
func (x Int16x8) GetElem(index uint8) int16

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRD, CPU Feature: AVX
func (x Int32x4) GetElem(index uint8) int32

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func (x Int64x2) GetElem(index uint8) int64

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRB, CPU Feature: AVX512BW
func (x Uint8x16) GetElem(index uint8) uint8

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRW, CPU Feature: AVX512BW
func (x Uint16x8) GetElem(index uint8) uint16

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRD, CPU Feature: AVX
func (x Uint32x4) GetElem(index uint8) uint32

// GetElem retrieves a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func (x Uint64x2) GetElem(index uint8) uint64

/* Greater */

// Greater compares for greater than.
//
// Asm: VPCMPGTB, CPU Feature: AVX
func (x Int8x16) Greater(y Int8x16) Mask8x16

// Greater compares for greater than.
//
// Asm: VPCMPGTB, CPU Feature: AVX2
func (x Int8x32) Greater(y Int8x32) Mask8x32

// Greater compares for greater than.
//
// Asm: VPCMPGTB, CPU Feature: AVX512BW
func (x Int8x64) Greater(y Int8x64) Mask8x64

// Greater compares for greater than.
//
// Asm: VPCMPGTW, CPU Feature: AVX
func (x Int16x8) Greater(y Int16x8) Mask16x8

// Greater compares for greater than.
//
// Asm: VPCMPGTW, CPU Feature: AVX2
func (x Int16x16) Greater(y Int16x16) Mask16x16

// Greater compares for greater than.
//
// Asm: VPCMPGTW, CPU Feature: AVX512BW
func (x Int16x32) Greater(y Int16x32) Mask16x32

// Greater compares for greater than.
//
// Asm: VPCMPGTD, CPU Feature: AVX
func (x Int32x4) Greater(y Int32x4) Mask32x4

// Greater compares for greater than.
//
// Asm: VPCMPGTD, CPU Feature: AVX2
func (x Int32x8) Greater(y Int32x8) Mask32x8

// Greater compares for greater than.
//
// Asm: VPCMPGTD, CPU Feature: AVX512F
func (x Int32x16) Greater(y Int32x16) Mask32x16

// Greater compares for greater than.
//
// Asm: VPCMPGTQ, CPU Feature: AVX
func (x Int64x2) Greater(y Int64x2) Mask64x2

// Greater compares for greater than.
//
// Asm: VPCMPGTQ, CPU Feature: AVX2
func (x Int64x4) Greater(y Int64x4) Mask64x4

// Greater compares for greater than.
//
// Asm: VPCMPGTQ, CPU Feature: AVX512F
func (x Int64x8) Greater(y Int64x8) Mask64x8

// Greater compares for greater than.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Greater(y Float32x4) Mask32x4

// Greater compares for greater than.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Greater(y Float32x8) Mask32x8

// Greater compares for greater than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) Greater(y Float32x16) Mask32x16

// Greater compares for greater than.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Greater(y Float64x2) Mask64x2

// Greater compares for greater than.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Greater(y Float64x4) Mask64x4

// Greater compares for greater than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) Greater(y Float64x8) Mask64x8

// Greater compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) Greater(y Uint8x16) Mask8x16

// Greater compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) Greater(y Uint8x32) Mask8x32

// Greater compares for greater than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) Greater(y Uint8x64) Mask8x64

// Greater compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) Greater(y Uint16x8) Mask16x8

// Greater compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) Greater(y Uint16x16) Mask16x16

// Greater compares for greater than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) Greater(y Uint16x32) Mask16x32

// Greater compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) Greater(y Uint32x4) Mask32x4

// Greater compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) Greater(y Uint32x8) Mask32x8

// Greater compares for greater than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) Greater(y Uint32x16) Mask32x16

// Greater compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) Greater(y Uint64x2) Mask64x2

// Greater compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) Greater(y Uint64x4) Mask64x4

// Greater compares for greater than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) Greater(y Uint64x8) Mask64x8

/* GreaterEqual */

// GreaterEqual compares for greater than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4

// GreaterEqual compares for greater than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8

// GreaterEqual compares for greater than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16

// GreaterEqual compares for greater than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2

// GreaterEqual compares for greater than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4

// GreaterEqual compares for greater than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4

// GreaterEqual compares for greater than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8

/* GreaterEqualMasked */

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4

// GreaterEqualMasked compares for greater than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8

/* GreaterMasked */

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4

// GreaterMasked compares for greater than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8

/* IsNan */

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) IsNan(y Float32x4) Mask32x4

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) IsNan(y Float32x8) Mask32x8

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) IsNan(y Float32x16) Mask32x16

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) IsNan(y Float64x2) Mask64x2

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) IsNan(y Float64x4) Mask64x4

// IsNan checks if elements are NaN. Use as x.IsNan(x).
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) IsNan(y Float64x8) Mask64x8

/* IsNanMasked */

// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4

// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8

// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16

// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2

// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4

// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8

/* Less */

// Less compares for less than.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Less(y Float32x4) Mask32x4

// Less compares for less than.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Less(y Float32x8) Mask32x8

// Less compares for less than.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) Less(y Float32x16) Mask32x16

// Less compares for less than.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Less(y Float64x2) Mask64x2

// Less compares for less than.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Less(y Float64x4) Mask64x4

// Less compares for less than.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) Less(y Float64x8) Mask64x8

// Less compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) Less(y Int8x16) Mask8x16

// Less compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) Less(y Int8x32) Mask8x32

// Less compares for less than.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) Less(y Int8x64) Mask8x64

// Less compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) Less(y Int16x8) Mask16x8

// Less compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) Less(y Int16x16) Mask16x16

// Less compares for less than.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) Less(y Int16x32) Mask16x32

// Less compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) Less(y Int32x4) Mask32x4

// Less compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) Less(y Int32x8) Mask32x8

// Less compares for less than.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) Less(y Int32x16) Mask32x16

// Less compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) Less(y Int64x2) Mask64x2

// Less compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) Less(y Int64x4) Mask64x4

// Less compares for less than.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) Less(y Int64x8) Mask64x8

// Less compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) Less(y Uint8x16) Mask8x16

// Less compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) Less(y Uint8x32) Mask8x32

// Less compares for less than.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) Less(y Uint8x64) Mask8x64

// Less compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) Less(y Uint16x8) Mask16x8

// Less compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) Less(y Uint16x16) Mask16x16

// Less compares for less than.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) Less(y Uint16x32) Mask16x32

// Less compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) Less(y Uint32x4) Mask32x4

// Less compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) Less(y Uint32x8) Mask32x8

// Less compares for less than.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) Less(y Uint32x16) Mask32x16

// Less compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) Less(y Uint64x2) Mask64x2

// Less compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) Less(y Uint64x4) Mask64x4

// Less compares for less than.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) Less(y Uint64x8) Mask64x8

/* LessEqual */

// LessEqual compares for less than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) LessEqual(y Float32x4) Mask32x4

// LessEqual compares for less than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) LessEqual(y Float32x8) Mask32x8

// LessEqual compares for less than or equal.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) LessEqual(y Float32x16) Mask32x16

// LessEqual compares for less than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) LessEqual(y Float64x2) Mask64x2

// LessEqual compares for less than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) LessEqual(y Float64x4) Mask64x4

// LessEqual compares for less than or equal.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) LessEqual(y Float64x8) Mask64x8

// LessEqual compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) LessEqual(y Int8x16) Mask8x16

// LessEqual compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) LessEqual(y Int8x32) Mask8x32

// LessEqual compares for less than or equal.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) LessEqual(y Int8x64) Mask8x64

// LessEqual compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) LessEqual(y Int16x8) Mask16x8

// LessEqual compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) LessEqual(y Int16x16) Mask16x16

// LessEqual compares for less than or equal.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) LessEqual(y Int16x32) Mask16x32

// LessEqual compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) LessEqual(y Int32x4) Mask32x4

// LessEqual compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) LessEqual(y Int32x8) Mask32x8

// LessEqual compares for less than or equal.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) LessEqual(y Int32x16) Mask32x16

// LessEqual compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) LessEqual(y Int64x2) Mask64x2

// LessEqual compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) LessEqual(y Int64x4) Mask64x4

// LessEqual compares for less than or equal.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) LessEqual(y Int64x8) Mask64x8

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4

// LessEqual compares for less than or equal.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8

/* LessEqualMasked */

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4

// LessEqualMasked compares for less than or equal.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8

/* LessMasked */

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4

// LessMasked compares for less than.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8

/* Max */

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX
func (x Float32x4) Max(y Float32x4) Float32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX
func (x Float32x8) Max(y Float32x8) Float32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPS, CPU Feature: AVX512F
func (x Float32x16) Max(y Float32x16) Float32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX
func (x Float64x2) Max(y Float64x2) Float64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX
func (x Float64x4) Max(y Float64x4) Float64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VMAXPD, CPU Feature: AVX512F
func (x Float64x8) Max(y Float64x8) Float64x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX
func (x Int8x16) Max(y Int8x16) Int8x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX2
func (x Int8x32) Max(y Int8x32) Int8x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSB, CPU Feature: AVX512BW
func (x Int8x64) Max(y Int8x64) Int8x64

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX
func (x Int16x8) Max(y Int16x8) Int16x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX2
func (x Int16x16) Max(y Int16x16) Int16x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSW, CPU Feature: AVX512BW
func (x Int16x32) Max(y Int16x32) Int16x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX
func (x Int32x4) Max(y Int32x4) Int32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX2
func (x Int32x8) Max(y Int32x8) Int32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSD, CPU Feature: AVX512F
func (x Int32x16) Max(y Int32x16) Int32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
func (x Int64x2) Max(y Int64x2) Int64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
func (x Int64x4) Max(y Int64x4) Int64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
func (x Int64x8) Max(y Int64x8) Int64x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX
func (x Uint8x16) Max(y Uint8x16) Uint8x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX2
func (x Uint8x32) Max(y Uint8x32) Uint8x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUB, CPU Feature: AVX512BW
func (x Uint8x64) Max(y Uint8x64) Uint8x64

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX
func (x Uint16x8) Max(y Uint16x8) Uint16x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX2
func (x Uint16x16) Max(y Uint16x16) Uint16x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUW, CPU Feature: AVX512BW
func (x Uint16x32) Max(y Uint16x32) Uint16x32

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX
func (x Uint32x4) Max(y Uint32x4) Uint32x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX2
func (x Uint32x8) Max(y Uint32x8) Uint32x8

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUD, CPU Feature: AVX512F
func (x Uint32x16) Max(y Uint32x16) Uint32x16

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
func (x Uint64x2) Max(y Uint64x2) Uint64x2

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
func (x Uint64x4) Max(y Uint64x4) Uint64x4

// Max computes the maximum of corresponding elements.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
func (x Uint64x8) Max(y Uint64x8) Uint64x8

/* MaxMasked */

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMAXPS, CPU Feature: AVX512F
func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMAXPS, CPU Feature: AVX512F
func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMAXPS, CPU Feature: AVX512F
func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMAXPD, CPU Feature: AVX512F
func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMAXPD, CPU Feature: AVX512F
func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMAXPD, CPU Feature: AVX512F
func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSB, CPU Feature: AVX512BW
func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSB, CPU Feature: AVX512BW
func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSB, CPU Feature: AVX512BW
func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSW, CPU Feature: AVX512BW
func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSW, CPU Feature: AVX512BW
func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSW, CPU Feature: AVX512BW
func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSD, CPU Feature: AVX512F
func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSD, CPU Feature: AVX512F
func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSD, CPU Feature: AVX512F
func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXSQ, CPU Feature: AVX512F
func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUB, CPU Feature: AVX512BW
func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUB, CPU Feature: AVX512BW
func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUB, CPU Feature: AVX512BW
func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUW, CPU Feature: AVX512BW
func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUW, CPU Feature: AVX512BW
func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUW, CPU Feature: AVX512BW
func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUD, CPU Feature: AVX512F
func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUD, CPU Feature: AVX512F
func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUD, CPU Feature: AVX512F
func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4

// MaxMasked computes the maximum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMAXUQ, CPU Feature: AVX512F
func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* Min */

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX
func (x Float32x4) Min(y Float32x4) Float32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX
func (x Float32x8) Min(y Float32x8) Float32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPS, CPU Feature: AVX512F
func (x Float32x16) Min(y Float32x16) Float32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX
func (x Float64x2) Min(y Float64x2) Float64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX
func (x Float64x4) Min(y Float64x4) Float64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VMINPD, CPU Feature: AVX512F
func (x Float64x8) Min(y Float64x8) Float64x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX
func (x Int8x16) Min(y Int8x16) Int8x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX2
func (x Int8x32) Min(y Int8x32) Int8x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSB, CPU Feature: AVX512BW
func (x Int8x64) Min(y Int8x64) Int8x64

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX
func (x Int16x8) Min(y Int16x8) Int16x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX2
func (x Int16x16) Min(y Int16x16) Int16x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSW, CPU Feature: AVX512BW
func (x Int16x32) Min(y Int16x32) Int16x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX
func (x Int32x4) Min(y Int32x4) Int32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX2
func (x Int32x8) Min(y Int32x8) Int32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSD, CPU Feature: AVX512F
func (x Int32x16) Min(y Int32x16) Int32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
func (x Int64x2) Min(y Int64x2) Int64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
func (x Int64x4) Min(y Int64x4) Int64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
func (x Int64x8) Min(y Int64x8) Int64x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX
func (x Uint8x16) Min(y Uint8x16) Uint8x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX2
func (x Uint8x32) Min(y Uint8x32) Uint8x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUB, CPU Feature: AVX512BW
func (x Uint8x64) Min(y Uint8x64) Uint8x64

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX
func (x Uint16x8) Min(y Uint16x8) Uint16x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX2
func (x Uint16x16) Min(y Uint16x16) Uint16x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUW, CPU Feature: AVX512BW
func (x Uint16x32) Min(y Uint16x32) Uint16x32

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX
func (x Uint32x4) Min(y Uint32x4) Uint32x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX2
func (x Uint32x8) Min(y Uint32x8) Uint32x8

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUD, CPU Feature: AVX512F
func (x Uint32x16) Min(y Uint32x16) Uint32x16

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
func (x Uint64x2) Min(y Uint64x2) Uint64x2

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
func (x Uint64x4) Min(y Uint64x4) Uint64x4

// Min computes the minimum of corresponding elements.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
func (x Uint64x8) Min(y Uint64x8) Uint64x8

/* MinMasked */

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMINPS, CPU Feature: AVX512F
func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMINPS, CPU Feature: AVX512F
func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMINPS, CPU Feature: AVX512F
func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMINPD, CPU Feature: AVX512F
func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMINPD, CPU Feature: AVX512F
func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMINPD, CPU Feature: AVX512F
func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSB, CPU Feature: AVX512BW
func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSB, CPU Feature: AVX512BW
func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSB, CPU Feature: AVX512BW
func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSW, CPU Feature: AVX512BW
func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSW, CPU Feature: AVX512BW
func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSW, CPU Feature: AVX512BW
func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSD, CPU Feature: AVX512F
func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSD, CPU Feature: AVX512F
func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSD, CPU Feature: AVX512F
func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINSQ, CPU Feature: AVX512F
func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUB, CPU Feature: AVX512BW
func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUB, CPU Feature: AVX512BW
func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUB, CPU Feature: AVX512BW
func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUW, CPU Feature: AVX512BW
func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUW, CPU Feature: AVX512BW
func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUW, CPU Feature: AVX512BW
func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUD, CPU Feature: AVX512F
func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUD, CPU Feature: AVX512F
func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUD, CPU Feature: AVX512F
func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4

// MinMasked computes the minimum of corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMINUQ, CPU Feature: AVX512F
func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* Mul */

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX
func (x Float32x4) Mul(y Float32x4) Float32x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX
func (x Float32x8) Mul(y Float32x8) Float32x8

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPS, CPU Feature: AVX512F
func (x Float32x16) Mul(y Float32x16) Float32x16

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX
func (x Float64x2) Mul(y Float64x2) Float64x2

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX
func (x Float64x4) Mul(y Float64x4) Float64x4

// Mul multiplies corresponding elements of two vectors.
//
// Asm: VMULPD, CPU Feature: AVX512F
func (x Float64x8) Mul(y Float64x8) Float64x8

/* MulByPowOf2 */

// MulByPowOf2 multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
func (x Float32x4) MulByPowOf2(y Float32x4) Float32x4

// MulByPowOf2 multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
func (x Float32x8) MulByPowOf2(y Float32x8) Float32x8

// MulByPowOf2 multiplies elements by a power of 2.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
func (x Float32x16) MulByPowOf2(y Float32x16) Float32x16

// MulByPowOf2 multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
func (x Float64x2) MulByPowOf2(y Float64x2) Float64x2

// MulByPowOf2 multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4

// MulByPowOf2 multiplies elements by a power of 2.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8

/* MulByPowOf2Masked */

// MulByPowOf2Masked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
func (x Float32x4) MulByPowOf2Masked(y Float32x4, mask Mask32x4) Float32x4

// MulByPowOf2Masked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
func (x Float32x8) MulByPowOf2Masked(y Float32x8, mask Mask32x8) Float32x8

// MulByPowOf2Masked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSCALEFPS, CPU Feature: AVX512F
func (x Float32x16) MulByPowOf2Masked(y Float32x16, mask Mask32x16) Float32x16

// MulByPowOf2Masked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
func (x Float64x2) MulByPowOf2Masked(y Float64x2, mask Mask64x2) Float64x2

// MulByPowOf2Masked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
func (x Float64x4) MulByPowOf2Masked(y Float64x4, mask Mask64x4) Float64x4

// MulByPowOf2Masked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSCALEFPD, CPU Feature: AVX512F
func (x Float64x8) MulByPowOf2Masked(y Float64x8, mask Mask64x8) Float64x8

/* MulEvenWiden */

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX
func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX2
func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX512F
func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX512F
func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULDQ, CPU Feature: AVX512F
func (x Int64x8) MulEvenWiden(y Int64x8) Int64x8

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX
func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX2
func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4

// MulEvenWiden multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8

/* MulEvenWidenMasked */

// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULDQ, CPU Feature: AVX512F
func (x Int64x2) MulEvenWidenMasked(y Int64x2, mask Mask64x2) Int64x2

// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULDQ, CPU Feature: AVX512F
func (x Int64x4) MulEvenWidenMasked(y Int64x4, mask Mask64x4) Int64x4

// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULDQ, CPU Feature: AVX512F
func (x Int64x8) MulEvenWidenMasked(y Int64x8, mask Mask64x8) Int64x8

// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, mask Mask64x2) Uint64x2

// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, mask Mask64x4) Uint64x4

// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULUDQ, CPU Feature: AVX512F
func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* MulHigh */

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX
func (x Int16x8) MulHigh(y Int16x8) Int16x8

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX2
func (x Int16x16) MulHigh(y Int16x16) Int16x16

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHW, CPU Feature: AVX512BW
func (x Int16x32) MulHigh(y Int16x32) Int16x32

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX
func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX2
func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16

// MulHigh multiplies elements and stores the high part of the result.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32

/* MulHighMasked */

// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHW, CPU Feature: AVX512BW
func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8

// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHW, CPU Feature: AVX512BW
func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16

// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHW, CPU Feature: AVX512BW
func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32

// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8

// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16

// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32

/* MulLow */

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLW, CPU Feature: AVX
func (x Int16x8) MulLow(y Int16x8) Int16x8

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLW, CPU Feature: AVX2
func (x Int16x16) MulLow(y Int16x16) Int16x16

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLW, CPU Feature: AVX512BW
func (x Int16x32) MulLow(y Int16x32) Int16x32

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLD, CPU Feature: AVX
func (x Int32x4) MulLow(y Int32x4) Int32x4

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLD, CPU Feature: AVX2
func (x Int32x8) MulLow(y Int32x8) Int32x8

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLD, CPU Feature: AVX512F
func (x Int32x16) MulLow(y Int32x16) Int32x16

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x2) MulLow(y Int64x2) Int64x2

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x4) MulLow(y Int64x4) Int64x4

// MulLow multiplies elements and stores the low part of the result.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x8) MulLow(y Int64x8) Int64x8

/* MulLowMasked */

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLW, CPU Feature: AVX512BW
func (x Int16x8) MulLowMasked(y Int16x8, mask Mask16x8) Int16x8

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLW, CPU Feature: AVX512BW
func (x Int16x16) MulLowMasked(y Int16x16, mask Mask16x16) Int16x16

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLW, CPU Feature: AVX512BW
func (x Int16x32) MulLowMasked(y Int16x32, mask Mask16x32) Int16x32

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLD, CPU Feature: AVX512F
func (x Int32x4) MulLowMasked(y Int32x4, mask Mask32x4) Int32x4

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLD, CPU Feature: AVX512F
func (x Int32x8) MulLowMasked(y Int32x8, mask Mask32x8) Int32x8

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLD, CPU Feature: AVX512F
func (x Int32x16) MulLowMasked(y Int32x16, mask Mask32x16) Int32x16

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x2) MulLowMasked(y Int64x2, mask Mask64x2) Int64x2

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x4) MulLowMasked(y Int64x4, mask Mask64x4) Int64x4

// MulLowMasked multiplies elements and stores the low part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x8) MulLowMasked(y Int64x8, mask Mask64x8) Int64x8

/* MulMasked */

// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMULPS, CPU Feature: AVX512F
func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4

// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMULPS, CPU Feature: AVX512F
func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8

// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMULPS, CPU Feature: AVX512F
func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16

// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMULPD, CPU Feature: AVX512F
func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2

// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMULPD, CPU Feature: AVX512F
func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4

// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VMULPD, CPU Feature: AVX512F
func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8

/* NotEqual */

// NotEqual compares for inequality.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) NotEqual(y Float32x4) Mask32x4

// NotEqual compares for inequality.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) NotEqual(y Float32x8) Mask32x8

// NotEqual compares for inequality.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) NotEqual(y Float32x16) Mask32x16

// NotEqual compares for inequality.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) NotEqual(y Float64x2) Mask64x2

// NotEqual compares for inequality.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) NotEqual(y Float64x4) Mask64x4

// NotEqual compares for inequality.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) NotEqual(y Float64x8) Mask64x8

// NotEqual compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) NotEqual(y Int8x16) Mask8x16

// NotEqual compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) NotEqual(y Int8x32) Mask8x32

// NotEqual compares for inequality.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) NotEqual(y Int8x64) Mask8x64

// NotEqual compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) NotEqual(y Int16x8) Mask16x8

// NotEqual compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) NotEqual(y Int16x16) Mask16x16

// NotEqual compares for inequality.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) NotEqual(y Int16x32) Mask16x32

// NotEqual compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) NotEqual(y Int32x4) Mask32x4

// NotEqual compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) NotEqual(y Int32x8) Mask32x8

// NotEqual compares for inequality.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) NotEqual(y Int32x16) Mask32x16

// NotEqual compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) NotEqual(y Int64x2) Mask64x2

// NotEqual compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) NotEqual(y Int64x4) Mask64x4

// NotEqual compares for inequality.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) NotEqual(y Int64x8) Mask64x8

// NotEqual compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16

// NotEqual compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32

// NotEqual compares for inequality.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64

// NotEqual compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8

// NotEqual compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16

// NotEqual compares for inequality.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32

// NotEqual compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4

// NotEqual compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8

// NotEqual compares for inequality.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16

// NotEqual compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2

// NotEqual compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4

// NotEqual compares for inequality.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8

/* NotEqualMasked */

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPS, CPU Feature: AVX512F
func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VCMPPD, CPU Feature: AVX512F
func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPB, CPU Feature: AVX512BW
func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPW, CPU Feature: AVX512BW
func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPD, CPU Feature: AVX512F
func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPQ, CPU Feature: AVX512F
func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUB, CPU Feature: AVX512BW
func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUW, CPU Feature: AVX512BW
func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUD, CPU Feature: AVX512F
func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4

// NotEqualMasked compares for inequality.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8

/* Or */

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int8x16) Or(y Int8x16) Int8x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int8x32) Or(y Int8x32) Int8x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int16x8) Or(y Int16x8) Int16x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int16x16) Or(y Int16x16) Int16x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int32x4) Or(y Int32x4) Int32x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int32x8) Or(y Int32x8) Int32x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Int32x16) Or(y Int32x16) Int32x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Int64x2) Or(y Int64x2) Int64x2

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Int64x4) Or(y Int64x4) Int64x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Int64x8) Or(y Int64x8) Int64x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint8x16) Or(y Uint8x16) Uint8x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint8x32) Or(y Uint8x32) Uint8x32

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint16x8) Or(y Uint16x8) Uint16x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint16x16) Or(y Uint16x16) Uint16x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint32x4) Or(y Uint32x4) Uint32x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint32x8) Or(y Uint32x8) Uint32x8

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Uint32x16) Or(y Uint32x16) Uint32x16

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX
func (x Uint64x2) Or(y Uint64x2) Uint64x2

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPOR, CPU Feature: AVX2
func (x Uint64x4) Or(y Uint64x4) Uint64x4

// Or performs a bitwise OR operation between two vectors.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Uint64x8) Or(y Uint64x8) Uint64x8

/* OrMasked */

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORD, CPU Feature: AVX512F
func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4

// OrMasked performs a bitwise OR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPORQ, CPU Feature: AVX512F
func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* PairDotProd */

// PairDotProd multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX
func (x Int16x8) PairDotProd(y Int16x8) Int32x4

// PairDotProd multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX2
func (x Int16x16) PairDotProd(y Int16x16) Int32x8

// PairDotProd multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDWD, CPU Feature: AVX512BW
func (x Int16x32) PairDotProd(y Int16x32) Int32x16

/* PairDotProdAccumulate */

// PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
//
// Asm: VPDPWSSD, CPU Feature: AVXVNNI
func (x Int16x8) PairDotProdAccumulate(y Int16x8, z Int32x4) Int32x4

// PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
//
// Asm: VPDPWSSD, CPU Feature: AVXVNNI
func (x Int16x16) PairDotProdAccumulate(y Int16x16, z Int32x8) Int32x8

// PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
//
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int16x32) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16

/* PairDotProdAccumulateMasked */

// PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int16x8) PairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4

// PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int16x16) PairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8

// PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int16x32) PairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16

/* PairDotProdMasked */

// PairDotProdMasked multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMADDWD, CPU Feature: AVX512BW
func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4

// PairDotProdMasked multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMADDWD, CPU Feature: AVX512BW
func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8

// PairDotProdMasked multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMADDWD, CPU Feature: AVX512BW
func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16

/* PairwiseAdd */

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPS, CPU Feature: AVX
func (x Float32x4) PairwiseAdd(y Float32x4) Float32x4

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPS, CPU Feature: AVX
func (x Float32x8) PairwiseAdd(y Float32x8) Float32x8

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPD, CPU Feature: AVX
func (x Float64x2) PairwiseAdd(y Float64x2) Float64x2

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VHADDPD, CPU Feature: AVX
func (x Float64x4) PairwiseAdd(y Float64x4) Float64x4

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX
func (x Int16x8) PairwiseAdd(y Int16x8) Int16x8

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX2
func (x Int16x16) PairwiseAdd(y Int16x16) Int16x16

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX
func (x Int32x4) PairwiseAdd(y Int32x4) Int32x4

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX2
func (x Int32x8) PairwiseAdd(y Int32x8) Int32x8

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX
func (x Uint16x8) PairwiseAdd(y Uint16x8) Uint16x8

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX2
func (x Uint16x16) PairwiseAdd(y Uint16x16) Uint16x16

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX
func (x Uint32x4) PairwiseAdd(y Uint32x4) Uint32x4

// PairwiseAdd horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX2
func (x Uint32x8) PairwiseAdd(y Uint32x8) Uint32x8

/* PairwiseSub */

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPS, CPU Feature: AVX
func (x Float32x4) PairwiseSub(y Float32x4) Float32x4

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPS, CPU Feature: AVX
func (x Float32x8) PairwiseSub(y Float32x8) Float32x8

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPD, CPU Feature: AVX
func (x Float64x2) PairwiseSub(y Float64x2) Float64x2

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VHSUBPD, CPU Feature: AVX
func (x Float64x4) PairwiseSub(y Float64x4) Float64x4

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX
func (x Int16x8) PairwiseSub(y Int16x8) Int16x8

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX2
func (x Int16x16) PairwiseSub(y Int16x16) Int16x16

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX
func (x Int32x4) PairwiseSub(y Int32x4) Int32x4

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX2
func (x Int32x8) PairwiseSub(y Int32x8) Int32x8

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX
func (x Uint16x8) PairwiseSub(y Uint16x8) Uint16x8

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX2
func (x Uint16x16) PairwiseSub(y Uint16x16) Uint16x16

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX
func (x Uint32x4) PairwiseSub(y Uint32x4) Uint32x4

// PairwiseSub horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX2
func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8

/* Permute */

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x16) Permute(indices Uint8x16) Int8x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x16) Permute(indices Uint8x16) Uint8x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x32) Permute(indices Uint8x32) Int8x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x32) Permute(indices Uint8x32) Uint8x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x64) Permute(indices Uint8x64) Int8x64

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x64) Permute(indices Uint8x64) Uint8x64

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Int16x8) Permute(indices Uint16x8) Int16x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Uint16x8) Permute(indices Uint16x8) Uint16x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Int16x16) Permute(indices Uint16x16) Int16x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Uint16x16) Permute(indices Uint16x16) Uint16x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Int16x32) Permute(indices Uint16x32) Int16x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Uint16x32) Permute(indices Uint16x32) Uint16x32

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPS, CPU Feature: AVX2
func (x Float32x8) Permute(indices Uint32x8) Float32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX2
func (x Int32x8) Permute(indices Uint32x8) Int32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX2
func (x Uint32x8) Permute(indices Uint32x8) Uint32x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPS, CPU Feature: AVX512F
func (x Float32x16) Permute(indices Uint32x16) Float32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX512F
func (x Int32x16) Permute(indices Uint32x16) Int32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMD, CPU Feature: AVX512F
func (x Uint32x16) Permute(indices Uint32x16) Uint32x16

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPD, CPU Feature: AVX512F
func (x Float64x4) Permute(indices Uint64x4) Float64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Int64x4) Permute(indices Uint64x4) Int64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Uint64x4) Permute(indices Uint64x4) Uint64x4

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMPD, CPU Feature: AVX512F
func (x Float64x8) Permute(indices Uint64x8) Float64x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Int64x8) Permute(indices Uint64x8) Int64x8

// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Uint64x8) Permute(indices Uint64x8) Uint64x8

/* Permute2 */

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x16) Permute2(y Int8x16, indices Uint8x16) Int8x16

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x16) Permute2(y Uint8x16, indices Uint8x16) Uint8x16

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x32) Permute2(y Int8x32, indices Uint8x32) Int8x32

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x32) Permute2(y Uint8x32, indices Uint8x32) Uint8x32

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x64) Permute2(y Int8x64, indices Uint8x64) Int8x64

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x64) Permute2(y Uint8x64, indices Uint8x64) Uint8x64

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Int16x8) Permute2(y Int16x8, indices Uint16x8) Int16x8

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Uint16x8) Permute2(y Uint16x8, indices Uint16x8) Uint16x8

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Int16x16) Permute2(y Int16x16, indices Uint16x16) Int16x16

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Uint16x16) Permute2(y Uint16x16, indices Uint16x16) Uint16x16

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Int16x32) Permute2(y Int16x32, indices Uint16x32) Int16x32

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Uint16x32) Permute2(y Uint16x32, indices Uint16x32) Uint16x32

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
func (x Float32x4) Permute2(y Float32x4, indices Uint32x4) Float32x4

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Int32x4) Permute2(y Int32x4, indices Uint32x4) Int32x4

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Uint32x4) Permute2(y Uint32x4, indices Uint32x4) Uint32x4

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
func (x Float32x8) Permute2(y Float32x8, indices Uint32x8) Float32x8

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Int32x8) Permute2(y Int32x8, indices Uint32x8) Int32x8

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Uint32x8) Permute2(y Uint32x8, indices Uint32x8) Uint32x8

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
func (x Float32x16) Permute2(y Float32x16, indices Uint32x16) Float32x16

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Int32x16) Permute2(y Int32x16, indices Uint32x16) Int32x16

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Uint32x16) Permute2(y Uint32x16, indices Uint32x16) Uint32x16

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
func (x Float64x2) Permute2(y Float64x2, indices Uint64x2) Float64x2

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Int64x2) Permute2(y Int64x2, indices Uint64x2) Int64x2

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Uint64x2) Permute2(y Uint64x2, indices Uint64x2) Uint64x2

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
func (x Float64x4) Permute2(y Float64x4, indices Uint64x4) Float64x4

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Int64x4) Permute2(y Int64x4, indices Uint64x4) Int64x4

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Uint64x4) Permute2(y Uint64x4, indices Uint64x4) Uint64x4

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
func (x Float64x8) Permute2(y Float64x8, indices Uint64x8) Float64x8

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8

// Permute2 performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8

/* Permute2Masked */

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2B, CPU Feature: AVX512VBMI
func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2W, CPU Feature: AVX512BW
func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2PS, CPU Feature: AVX512F
func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2D, CPU Feature: AVX512F
func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2PD, CPU Feature: AVX512F
func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8

// Permute2Masked performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMI2Q, CPU Feature: AVX512F
func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8

/* PermuteMasked */

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMW, CPU Feature: AVX512BW
func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMPS, CPU Feature: AVX512F
func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMD, CPU Feature: AVX512F
func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMD, CPU Feature: AVX512F
func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMPS, CPU Feature: AVX512F
func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMD, CPU Feature: AVX512F
func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMD, CPU Feature: AVX512F
func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMPD, CPU Feature: AVX512F
func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMPD, CPU Feature: AVX512F
func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8

// PermuteMasked performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPERMQ, CPU Feature: AVX512F
func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8

/* PopCount */

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x16) PopCount() Int8x16

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x32) PopCount() Int8x32

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x64) PopCount() Int8x64

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x8) PopCount() Int16x8

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x16) PopCount() Int16x16

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x32) PopCount() Int16x32

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x4) PopCount() Int32x4

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x8) PopCount() Int32x8

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x16) PopCount() Int32x16

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x2) PopCount() Int64x2

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x4) PopCount() Int64x4

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x8) PopCount() Int64x8

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x16) PopCount() Uint8x16

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x32) PopCount() Uint8x32

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x64) PopCount() Uint8x64

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x8) PopCount() Uint16x8

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x16) PopCount() Uint16x16

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x32) PopCount() Uint16x32

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x4) PopCount() Uint32x4

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x8) PopCount() Uint32x8

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x16) PopCount() Uint32x16

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x2) PopCount() Uint64x2

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x4) PopCount() Uint64x4

// PopCount counts the number of set bits in each element.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x8) PopCount() Uint64x8

/* PopCountMasked */

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x16) PopCountMasked(mask Mask8x16) Int8x16

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x32) PopCountMasked(mask Mask8x32) Int8x32

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Int8x64) PopCountMasked(mask Mask8x64) Int8x64

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x8) PopCountMasked(mask Mask16x8) Int16x8

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x16) PopCountMasked(mask Mask16x16) Int16x16

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Int16x32) PopCountMasked(mask Mask16x32) Int16x32

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x4) PopCountMasked(mask Mask32x4) Int32x4

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x8) PopCountMasked(mask Mask32x8) Int32x8

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Int32x16) PopCountMasked(mask Mask32x16) Int32x16

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x2) PopCountMasked(mask Mask64x2) Int64x2

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x4) PopCountMasked(mask Mask64x4) Int64x4

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Int64x8) PopCountMasked(mask Mask64x8) Int64x8

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x16) PopCountMasked(mask Mask8x16) Uint8x16

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x32) PopCountMasked(mask Mask8x32) Uint8x32

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
func (x Uint8x64) PopCountMasked(mask Mask8x64) Uint8x64

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x8) PopCountMasked(mask Mask16x8) Uint16x8

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x16) PopCountMasked(mask Mask16x16) Uint16x16

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
func (x Uint16x32) PopCountMasked(mask Mask16x32) Uint16x32

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x4) PopCountMasked(mask Mask32x4) Uint32x4

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x8) PopCountMasked(mask Mask32x8) Uint32x8

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
func (x Uint32x16) PopCountMasked(mask Mask32x16) Uint32x16

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x2) PopCountMasked(mask Mask64x2) Uint64x2

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x4) PopCountMasked(mask Mask64x4) Uint64x4

// PopCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
func (x Uint64x8) PopCountMasked(mask Mask64x8) Uint64x8

/* RotateAllLeft */

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Int32x4) RotateAllLeft(shift uint8) Int32x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Int32x8) RotateAllLeft(shift uint8) Int32x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Int32x16) RotateAllLeft(shift uint8) Int32x16

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Int64x2) RotateAllLeft(shift uint8) Int64x2

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Int64x4) RotateAllLeft(shift uint8) Int64x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Int64x8) RotateAllLeft(shift uint8) Int64x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4

// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8

/* RotateAllLeftMasked */

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLD, CPU Feature: AVX512F
func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4

// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPROLQ, CPU Feature: AVX512F
func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8

/* RotateAllRight */

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Int32x4) RotateAllRight(shift uint8) Int32x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Int32x8) RotateAllRight(shift uint8) Int32x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Int32x16) RotateAllRight(shift uint8) Int32x16

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Int64x2) RotateAllRight(shift uint8) Int64x2

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Int64x4) RotateAllRight(shift uint8) Int64x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Int64x8) RotateAllRight(shift uint8) Int64x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4

// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8

/* RotateAllRightMasked */

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORD, CPU Feature: AVX512F
func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4

// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPRORQ, CPU Feature: AVX512F
func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8

/* RotateLeft */

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Int32x4) RotateLeft(y Int32x4) Int32x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Int32x8) RotateLeft(y Int32x8) Int32x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Int32x16) RotateLeft(y Int32x16) Int32x16

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Int64x2) RotateLeft(y Int64x2) Int64x2

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Int64x4) RotateLeft(y Int64x4) Int64x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Int64x8) RotateLeft(y Int64x8) Int64x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4

// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8

/* RotateLeftMasked */

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVD, CPU Feature: AVX512F
func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4

// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPROLVQ, CPU Feature: AVX512F
func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* RotateRight */

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Int32x4) RotateRight(y Int32x4) Int32x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Int32x8) RotateRight(y Int32x8) Int32x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Int32x16) RotateRight(y Int32x16) Int32x16

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Int64x2) RotateRight(y Int64x2) Int64x2

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Int64x4) RotateRight(y Int64x4) Int64x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Int64x8) RotateRight(y Int64x8) Int64x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4

// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8

/* RotateRightMasked */

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4

// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* Round */

// Round rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) Round() Float32x4

// Round rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) Round() Float32x8

// Round rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) Round() Float64x2

// Round rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Round() Float64x4

/* RoundWithPrecision */

// RoundWithPrecision rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) RoundWithPrecision(prec uint8) Float32x4

// RoundWithPrecision rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) RoundWithPrecision(prec uint8) Float32x8

// RoundWithPrecision rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) RoundWithPrecision(prec uint8) Float32x16

// RoundWithPrecision rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) RoundWithPrecision(prec uint8) Float64x2

// RoundWithPrecision rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) RoundWithPrecision(prec uint8) Float64x4

// RoundWithPrecision rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) RoundWithPrecision(prec uint8) Float64x8

/* RoundWithPrecisionMasked */

// RoundWithPrecisionMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) RoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// RoundWithPrecisionMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) RoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// RoundWithPrecisionMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) RoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// RoundWithPrecisionMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) RoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// RoundWithPrecisionMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) RoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// RoundWithPrecisionMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) RoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* SaturatedAdd */

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX
func (x Int8x16) SaturatedAdd(y Int8x16) Int8x16

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX2
func (x Int8x32) SaturatedAdd(y Int8x32) Int8x32

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Int8x64) SaturatedAdd(y Int8x64) Int8x64

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX
func (x Int16x8) SaturatedAdd(y Int16x8) Int16x8

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX2
func (x Int16x16) SaturatedAdd(y Int16x16) Int16x16

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX
func (x Uint8x16) SaturatedAdd(y Uint8x16) Uint8x16

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX2
func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Uint8x64) SaturatedAdd(y Uint8x64) Uint8x64

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX
func (x Uint16x8) SaturatedAdd(y Uint16x8) Uint16x8

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX2
func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16

// SaturatedAdd adds corresponding elements of two vectors with saturation.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32

/* SaturatedAddMasked */

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Int8x16) SaturatedAddMasked(y Int8x16, mask Mask8x16) Int8x16

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Int8x32) SaturatedAddMasked(y Int8x32, mask Mask8x32) Int8x32

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Int8x64) SaturatedAddMasked(y Int8x64, mask Mask8x64) Int8x64

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Int16x8) SaturatedAddMasked(y Int16x8, mask Mask16x8) Int16x8

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Int16x16) SaturatedAddMasked(y Int16x16, mask Mask16x16) Int16x16

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Int16x32) SaturatedAddMasked(y Int16x32, mask Mask16x32) Int16x32

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Uint8x16) SaturatedAddMasked(y Uint8x16, mask Mask8x16) Uint8x16

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Uint8x32) SaturatedAddMasked(y Uint8x32, mask Mask8x32) Uint8x32

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSB, CPU Feature: AVX512BW
func (x Uint8x64) SaturatedAddMasked(y Uint8x64, mask Mask8x64) Uint8x64

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Uint16x8) SaturatedAddMasked(y Uint16x8, mask Mask16x8) Uint16x8

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Uint16x16) SaturatedAddMasked(y Uint16x16, mask Mask16x16) Uint16x16

// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPADDSW, CPU Feature: AVX512BW
func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32

/* SaturatedPairDotProdAccumulate */

// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
//
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
func (x Int16x8) SaturatedPairDotProdAccumulate(y Int16x8, z Int32x4) Int32x4

// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
//
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
func (x Int16x16) SaturatedPairDotProdAccumulate(y Int16x16, z Int32x8) Int32x8

// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
//
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int16x32) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16

/* SaturatedPairDotProdAccumulateMasked */

// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int16x8) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4

// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int16x16) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8

// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int16x32) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16

/* SaturatedPairwiseAdd */

// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX
func (x Int16x8) SaturatedPairwiseAdd(y Int16x8) Int16x8

// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX2
func (x Int16x16) SaturatedPairwiseAdd(y Int16x16) Int16x16

/* SaturatedPairwiseSub */

// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX
func (x Int16x8) SaturatedPairwiseSub(y Int16x8) Int16x8

// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX2
func (x Int16x16) SaturatedPairwiseSub(y Int16x16) Int16x16

/* SaturatedSub */

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX
func (x Int8x16) SaturatedSub(y Int8x16) Int8x16

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX2
func (x Int8x32) SaturatedSub(y Int8x32) Int8x32

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Int8x64) SaturatedSub(y Int8x64) Int8x64

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX
func (x Int16x8) SaturatedSub(y Int16x8) Int16x8

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX2
func (x Int16x16) SaturatedSub(y Int16x16) Int16x16

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Int16x32) SaturatedSub(y Int16x32) Int16x32

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX
func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX2
func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Uint8x64) SaturatedSub(y Uint8x64) Uint8x64

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX
func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX2
func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16

// SaturatedSub subtracts corresponding elements of two vectors with saturation.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32

/* SaturatedSubMasked */

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Int8x16) SaturatedSubMasked(y Int8x16, mask Mask8x16) Int8x16

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Int8x32) SaturatedSubMasked(y Int8x32, mask Mask8x32) Int8x32

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Int8x64) SaturatedSubMasked(y Int8x64, mask Mask8x64) Int8x64

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Int16x8) SaturatedSubMasked(y Int16x8, mask Mask16x8) Int16x8

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Int16x16) SaturatedSubMasked(y Int16x16, mask Mask16x16) Int16x16

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Int16x32) SaturatedSubMasked(y Int16x32, mask Mask16x32) Int16x32

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Uint8x16) SaturatedSubMasked(y Uint8x16, mask Mask8x16) Uint8x16

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Uint8x32) SaturatedSubMasked(y Uint8x32, mask Mask8x32) Uint8x32

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSB, CPU Feature: AVX512BW
func (x Uint8x64) SaturatedSubMasked(y Uint8x64, mask Mask8x64) Uint8x64

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Uint16x8) SaturatedSubMasked(y Uint16x8, mask Mask16x8) Uint16x8

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Uint16x16) SaturatedSubMasked(y Uint16x16, mask Mask16x16) Uint16x16

// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBSW, CPU Feature: AVX512BW
func (x Uint16x32) SaturatedSubMasked(y Uint16x32, mask Mask16x32) Uint16x32

/* SaturatedUnsignedSignedPairDotProd */

// SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX
func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8

// SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX2
func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16

// SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512BW
func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32

/* SaturatedUnsignedSignedPairDotProdMasked */

// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512BW
func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, mask Mask16x8) Int16x8

// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512BW
func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, mask Mask16x16) Int16x16

// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512BW
func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask16x32) Int16x32

/* SaturatedUnsignedSignedQuadDotProdAccumulate */

// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4

// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8

// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16

/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */

// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4

// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8

// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16

/* Set128 */

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTF128, CPU Feature: AVX
func (x Float32x8) Set128(index uint8, y Float32x4) Float32x8

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTF128, CPU Feature: AVX
func (x Float64x4) Set128(index uint8, y Float64x2) Float64x4

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int8x32) Set128(index uint8, y Int8x16) Int8x32

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int16x16) Set128(index uint8, y Int16x8) Int16x16

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int32x8) Set128(index uint8, y Int32x4) Int32x8

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Int64x4) Set128(index uint8, y Int64x2) Int64x4

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint8x32) Set128(index uint8, y Uint8x16) Uint8x32

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint16x16) Set128(index uint8, y Uint16x8) Uint16x16

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint32x8) Set128(index uint8, y Uint32x4) Uint32x8

// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VINSERTI128, CPU Feature: AVX2
func (x Uint64x4) Set128(index uint8, y Uint64x2) Uint64x4

/* SetElem */

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRB, CPU Feature: AVX
func (x Int8x16) SetElem(index uint8, y int8) Int8x16

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRW, CPU Feature: AVX
func (x Int16x8) SetElem(index uint8, y int16) Int16x8

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Int32x4) SetElem(index uint8, y int32) Int32x4

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Int64x2) SetElem(index uint8, y int64) Int64x2

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRB, CPU Feature: AVX
func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRW, CPU Feature: AVX
func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4

// SetElem sets a single constant-indexed element's value.
//
// index is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2

/* ShiftAllLeft */

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX
func (x Int16x8) ShiftAllLeft(y uint64) Int16x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX2
func (x Int16x16) ShiftAllLeft(y uint64) Int16x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Int16x32) ShiftAllLeft(y uint64) Int16x32

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX
func (x Int32x4) ShiftAllLeft(y uint64) Int32x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX2
func (x Int32x8) ShiftAllLeft(y uint64) Int32x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Int32x16) ShiftAllLeft(y uint64) Int32x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX
func (x Int64x2) ShiftAllLeft(y uint64) Int64x2

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func (x Int64x4) ShiftAllLeft(y uint64) Int64x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Int64x8) ShiftAllLeft(y uint64) Int64x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX
func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX2
func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX
func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX2
func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX
func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4

// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8

/* ShiftAllLeftConcat */

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4

// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8

/* ShiftAllLeftConcatMasked */

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllLeftConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllLeftConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllLeftConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllLeftConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllLeftConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllLeftConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllLeftConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllLeftConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllLeftConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllLeftConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllLeftConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllLeftConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllLeftConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllLeftConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllLeftConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllLeftConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllLeftConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4

// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftAllLeftConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8

/* ShiftAllLeftMasked */

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLD, CPU Feature: AVX512F
func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4

// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8

/* ShiftAllRight */

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX
func (x Int16x8) ShiftAllRight(y uint64) Int16x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX2
func (x Int16x16) ShiftAllRight(y uint64) Int16x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAW, CPU Feature: AVX512BW
func (x Int16x32) ShiftAllRight(y uint64) Int16x32

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX
func (x Int32x4) ShiftAllRight(y uint64) Int32x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX2
func (x Int32x8) ShiftAllRight(y uint64) Int32x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAD, CPU Feature: AVX512F
func (x Int32x16) ShiftAllRight(y uint64) Int32x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
func (x Int64x2) ShiftAllRight(y uint64) Int64x2

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
func (x Int64x4) ShiftAllRight(y uint64) Int64x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
func (x Int64x8) ShiftAllRight(y uint64) Int64x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX
func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX2
func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX
func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX2
func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLD, CPU Feature: AVX512F
func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX
func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX2
func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4

// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// Asm: VPSRLQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8

/* ShiftAllRightConcat */

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4

// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8

/* ShiftAllRightConcatMasked */

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllRightConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllRightConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllRightConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllRightConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllRightConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllRightConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllRightConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllRightConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllRightConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllRightConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllRightConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllRightConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllRightConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllRightConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllRightConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllRightConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllRightConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4

// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// shift is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftAllRightConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8

/* ShiftAllRightMasked */

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAW, CPU Feature: AVX512BW
func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAW, CPU Feature: AVX512BW
func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAW, CPU Feature: AVX512BW
func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAD, CPU Feature: AVX512F
func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAD, CPU Feature: AVX512F
func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAD, CPU Feature: AVX512F
func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAQ, CPU Feature: AVX512F
func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLW, CPU Feature: AVX512BW
func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLW, CPU Feature: AVX512BW
func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLD, CPU Feature: AVX512F
func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLD, CPU Feature: AVX512F
func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLD, CPU Feature: AVX512F
func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLQ, CPU Feature: AVX512F
func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLQ, CPU Feature: AVX512F
func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4

// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8

/* ShiftLeft */

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Int16x8) ShiftLeft(y Int16x8) Int16x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Int16x16) ShiftLeft(y Int16x16) Int16x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Int16x32) ShiftLeft(y Int16x32) Int16x32

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Int32x4) ShiftLeft(y Int32x4) Int32x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Int32x8) ShiftLeft(y Int32x8) Int32x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Int32x16) ShiftLeft(y Int32x16) Int32x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Int64x2) ShiftLeft(y Int64x2) Int64x2

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Int64x4) ShiftLeft(y Int64x4) Int64x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Int64x8) ShiftLeft(y Int64x8) Int64x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4

// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8

/* ShiftLeftConcat */

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4

// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8

/* ShiftLeftConcatMasked */

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftLeftConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftLeftConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftLeftConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftLeftConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftLeftConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftLeftConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftLeftConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftLeftConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftLeftConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftLeftConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftLeftConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftLeftConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftLeftConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftLeftConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftLeftConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftLeftConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftLeftConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4

// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftLeftConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8

/* ShiftLeftMasked */

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVD, CPU Feature: AVX512F
func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4

// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSLLVQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* ShiftRight */

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
func (x Int16x8) ShiftRight(y Int16x8) Int16x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
func (x Int16x16) ShiftRight(y Int16x16) Int16x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
func (x Int16x32) ShiftRight(y Int16x32) Int16x32

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func (x Int32x4) ShiftRight(y Int32x4) Int32x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func (x Int32x8) ShiftRight(y Int32x8) Int32x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVD, CPU Feature: AVX512F
func (x Int32x16) ShiftRight(y Int32x16) Int32x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
func (x Int64x2) ShiftRight(y Int64x2) Int64x2

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
func (x Int64x4) ShiftRight(y Int64x4) Int64x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
func (x Int64x8) ShiftRight(y Int64x8) Int64x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVD, CPU Feature: AVX512F
func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4

// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// Asm: VPSRLVQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8

/* ShiftRightConcat */

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4

// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8

/* ShiftRightConcatMasked */

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftRightConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftRightConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftRightConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftRightConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftRightConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftRightConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftRightConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftRightConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftRightConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftRightConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftRightConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftRightConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftRightConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftRightConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftRightConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftRightConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftRightConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4

// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftRightConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8

/* ShiftRightMasked */

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVW, CPU Feature: AVX512BW
func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVD, CPU Feature: AVX512F
func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVD, CPU Feature: AVX512F
func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVD, CPU Feature: AVX512F
func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRAVQ, CPU Feature: AVX512F
func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVW, CPU Feature: AVX512BW
func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVD, CPU Feature: AVX512F
func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVD, CPU Feature: AVX512F
func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVD, CPU Feature: AVX512F
func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVQ, CPU Feature: AVX512F
func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVQ, CPU Feature: AVX512F
func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4

// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSRLVQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* Sign */

// Sign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNB, CPU Feature: AVX
func (x Int8x16) Sign(y Int8x16) Int8x16

// Sign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNB, CPU Feature: AVX2
func (x Int8x32) Sign(y Int8x32) Int8x32

// Sign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNW, CPU Feature: AVX
func (x Int16x8) Sign(y Int16x8) Int16x8

// Sign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGNW, CPU Feature: AVX2
func (x Int16x16) Sign(y Int16x16) Int16x16

// Sign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGND, CPU Feature: AVX
func (x Int32x4) Sign(y Int32x4) Int32x4

// Sign returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
//
// Asm: VPSIGND, CPU Feature: AVX2
func (x Int32x8) Sign(y Int32x8) Int32x8

/* Sqrt */

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX
func (x Float32x4) Sqrt() Float32x4

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX
func (x Float32x8) Sqrt() Float32x8

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPS, CPU Feature: AVX512F
func (x Float32x16) Sqrt() Float32x16

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX
func (x Float64x2) Sqrt() Float64x2

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX
func (x Float64x4) Sqrt() Float64x4

// Sqrt computes the square root of each element.
//
// Asm: VSQRTPD, CPU Feature: AVX512F
func (x Float64x8) Sqrt() Float64x8

/* SqrtMasked */

// SqrtMasked computes the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSQRTPS, CPU Feature: AVX512F
func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4

// SqrtMasked computes the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSQRTPS, CPU Feature: AVX512F
func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8

// SqrtMasked computes the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSQRTPS, CPU Feature: AVX512F
func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16

// SqrtMasked computes the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSQRTPD, CPU Feature: AVX512F
func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2

// SqrtMasked computes the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSQRTPD, CPU Feature: AVX512F
func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4

// SqrtMasked computes the square root of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSQRTPD, CPU Feature: AVX512F
func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8

/* Sub */

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX
func (x Float32x4) Sub(y Float32x4) Float32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX
func (x Float32x8) Sub(y Float32x8) Float32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPS, CPU Feature: AVX512F
func (x Float32x16) Sub(y Float32x16) Float32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX
func (x Float64x2) Sub(y Float64x2) Float64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX
func (x Float64x4) Sub(y Float64x4) Float64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VSUBPD, CPU Feature: AVX512F
func (x Float64x8) Sub(y Float64x8) Float64x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX
func (x Int8x16) Sub(y Int8x16) Int8x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX2
func (x Int8x32) Sub(y Int8x32) Int8x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Int8x64) Sub(y Int8x64) Int8x64

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX
func (x Int16x8) Sub(y Int16x8) Int16x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX2
func (x Int16x16) Sub(y Int16x16) Int16x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Int16x32) Sub(y Int16x32) Int16x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX
func (x Int32x4) Sub(y Int32x4) Int32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX2
func (x Int32x8) Sub(y Int32x8) Int32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Int32x16) Sub(y Int32x16) Int32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX
func (x Int64x2) Sub(y Int64x2) Int64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX2
func (x Int64x4) Sub(y Int64x4) Int64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Int64x8) Sub(y Int64x8) Int64x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX
func (x Uint8x16) Sub(y Uint8x16) Uint8x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX2
func (x Uint8x32) Sub(y Uint8x32) Uint8x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Uint8x64) Sub(y Uint8x64) Uint8x64

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX
func (x Uint16x8) Sub(y Uint16x8) Uint16x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX2
func (x Uint16x16) Sub(y Uint16x16) Uint16x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Uint16x32) Sub(y Uint16x32) Uint16x32

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX
func (x Uint32x4) Sub(y Uint32x4) Uint32x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX2
func (x Uint32x8) Sub(y Uint32x8) Uint32x8

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Uint32x16) Sub(y Uint32x16) Uint32x16

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX
func (x Uint64x2) Sub(y Uint64x2) Uint64x2

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX2
func (x Uint64x4) Sub(y Uint64x4) Uint64x4

// Sub subtracts corresponding elements of two vectors.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Uint64x8) Sub(y Uint64x8) Uint64x8

/* SubMasked */

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSUBPS, CPU Feature: AVX512F
func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSUBPS, CPU Feature: AVX512F
func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSUBPS, CPU Feature: AVX512F
func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSUBPD, CPU Feature: AVX512F
func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSUBPD, CPU Feature: AVX512F
func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VSUBPD, CPU Feature: AVX512F
func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBB, CPU Feature: AVX512BW
func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBW, CPU Feature: AVX512BW
func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBD, CPU Feature: AVX512F
func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4

// SubMasked subtracts corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8

/* Trunc */

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) Trunc() Float32x4

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) Trunc() Float32x8

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) Trunc() Float64x2

// Trunc truncates elements towards zero.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Trunc() Float64x4

/* TruncWithPrecision */

// TruncWithPrecision truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) TruncWithPrecision(prec uint8) Float32x4

// TruncWithPrecision truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) TruncWithPrecision(prec uint8) Float32x8

// TruncWithPrecision truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) TruncWithPrecision(prec uint8) Float32x16

// TruncWithPrecision truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) TruncWithPrecision(prec uint8) Float64x2

// TruncWithPrecision truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) TruncWithPrecision(prec uint8) Float64x4

// TruncWithPrecision truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) TruncWithPrecision(prec uint8) Float64x8

/* TruncWithPrecisionMasked */

// TruncWithPrecisionMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x4) TruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4

// TruncWithPrecisionMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x8) TruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8

// TruncWithPrecisionMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
func (x Float32x16) TruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16

// TruncWithPrecisionMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x2) TruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2

// TruncWithPrecisionMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x4) TruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4

// TruncWithPrecisionMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
func (x Float64x8) TruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8

/* UnsignedSignedQuadDotProdAccumulate */

// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Int8x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4

// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Int8x32) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8

// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int8x64) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16

/* UnsignedSignedQuadDotProdAccumulateMasked */

// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int8x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4

// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int8x32) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8

// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int8x64) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16

/* Xor */

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int8x16) Xor(y Int8x16) Int8x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int8x32) Xor(y Int8x32) Int8x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int16x8) Xor(y Int16x8) Int16x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int16x16) Xor(y Int16x16) Int16x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int32x4) Xor(y Int32x4) Int32x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int32x8) Xor(y Int32x8) Int32x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Int32x16) Xor(y Int32x16) Int32x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Int64x2) Xor(y Int64x2) Int64x2

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Int64x4) Xor(y Int64x4) Int64x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Int64x8) Xor(y Int64x8) Int64x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint8x16) Xor(y Uint8x16) Uint8x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint8x32) Xor(y Uint8x32) Uint8x32

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint16x8) Xor(y Uint16x8) Uint16x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint16x16) Xor(y Uint16x16) Uint16x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint32x4) Xor(y Uint32x4) Uint32x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint32x8) Xor(y Uint32x8) Uint32x8

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Uint32x16) Xor(y Uint32x16) Uint32x16

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX
func (x Uint64x2) Xor(y Uint64x2) Uint64x2

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXOR, CPU Feature: AVX2
func (x Uint64x4) Xor(y Uint64x4) Uint64x4

// Xor performs a bitwise XOR operation between two vectors.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Uint64x8) Xor(y Uint64x8) Uint64x8

/* XorMasked */

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORD, CPU Feature: AVX512F
func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4

// XorMasked performs a bitwise XOR operation between two vectors.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPXORQ, CPU Feature: AVX512F
func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8

// Float64x2 converts from Float32x4 to Float64x2
func (from Float32x4) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Float32x4 to Int8x16
func (from Float32x4) AsInt8x16() (to Int8x16)

// Int16x8 converts from Float32x4 to Int16x8
func (from Float32x4) AsInt16x8() (to Int16x8)

// Int32x4 converts from Float32x4 to Int32x4
func (from Float32x4) AsInt32x4() (to Int32x4)

// Int64x2 converts from Float32x4 to Int64x2
func (from Float32x4) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Float32x4 to Uint8x16
func (from Float32x4) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Float32x4 to Uint16x8
func (from Float32x4) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Float32x4 to Uint32x4
func (from Float32x4) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Float32x4 to Uint64x2
func (from Float32x4) AsUint64x2() (to Uint64x2)

// Float64x4 converts from Float32x8 to Float64x4
func (from Float32x8) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Float32x8 to Int8x32
func (from Float32x8) AsInt8x32() (to Int8x32)

// Int16x16 converts from Float32x8 to Int16x16
func (from Float32x8) AsInt16x16() (to Int16x16)

// Int32x8 converts from Float32x8 to Int32x8
func (from Float32x8) AsInt32x8() (to Int32x8)

// Int64x4 converts from Float32x8 to Int64x4
func (from Float32x8) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Float32x8 to Uint8x32
func (from Float32x8) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Float32x8 to Uint16x16
func (from Float32x8) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Float32x8 to Uint32x8
func (from Float32x8) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Float32x8 to Uint64x4
func (from Float32x8) AsUint64x4() (to Uint64x4)

// Float64x8 converts from Float32x16 to Float64x8
func (from Float32x16) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Float32x16 to Int8x64
func (from Float32x16) AsInt8x64() (to Int8x64)

// Int16x32 converts from Float32x16 to Int16x32
func (from Float32x16) AsInt16x32() (to Int16x32)

// Int32x16 converts from Float32x16 to Int32x16
func (from Float32x16) AsInt32x16() (to Int32x16)

// Int64x8 converts from Float32x16 to Int64x8
func (from Float32x16) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Float32x16 to Uint8x64
func (from Float32x16) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Float32x16 to Uint16x32
func (from Float32x16) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Float32x16 to Uint32x16
func (from Float32x16) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Float32x16 to Uint64x8
func (from Float32x16) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Float64x2 to Float32x4
func (from Float64x2) AsFloat32x4() (to Float32x4)

// Int8x16 converts from Float64x2 to Int8x16
func (from Float64x2) AsInt8x16() (to Int8x16)

// Int16x8 converts from Float64x2 to Int16x8
func (from Float64x2) AsInt16x8() (to Int16x8)

// Int32x4 converts from Float64x2 to Int32x4
func (from Float64x2) AsInt32x4() (to Int32x4)

// Int64x2 converts from Float64x2 to Int64x2
func (from Float64x2) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Float64x2 to Uint8x16
func (from Float64x2) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Float64x2 to Uint16x8
func (from Float64x2) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Float64x2 to Uint32x4
func (from Float64x2) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Float64x2 to Uint64x2
func (from Float64x2) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Float64x4 to Float32x8
func (from Float64x4) AsFloat32x8() (to Float32x8)

// Int8x32 converts from Float64x4 to Int8x32
func (from Float64x4) AsInt8x32() (to Int8x32)

// Int16x16 converts from Float64x4 to Int16x16
func (from Float64x4) AsInt16x16() (to Int16x16)

// Int32x8 converts from Float64x4 to Int32x8
func (from Float64x4) AsInt32x8() (to Int32x8)

// Int64x4 converts from Float64x4 to Int64x4
func (from Float64x4) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Float64x4 to Uint8x32
func (from Float64x4) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Float64x4 to Uint16x16
func (from Float64x4) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Float64x4 to Uint32x8
func (from Float64x4) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Float64x4 to Uint64x4
func (from Float64x4) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Float64x8 to Float32x16
func (from Float64x8) AsFloat32x16() (to Float32x16)

// Int8x64 converts from Float64x8 to Int8x64
func (from Float64x8) AsInt8x64() (to Int8x64)

// Int16x32 converts from Float64x8 to Int16x32
func (from Float64x8) AsInt16x32() (to Int16x32)

// Int32x16 converts from Float64x8 to Int32x16
func (from Float64x8) AsInt32x16() (to Int32x16)

// Int64x8 converts from Float64x8 to Int64x8
func (from Float64x8) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Float64x8 to Uint8x64
func (from Float64x8) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Float64x8 to Uint16x32
func (from Float64x8) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Float64x8 to Uint32x16
func (from Float64x8) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Float64x8 to Uint64x8
func (from Float64x8) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int8x16 to Float32x4
func (from Int8x16) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int8x16 to Float64x2
func (from Int8x16) AsFloat64x2() (to Float64x2)

// Int16x8 converts from Int8x16 to Int16x8
func (from Int8x16) AsInt16x8() (to Int16x8)

// Int32x4 converts from Int8x16 to Int32x4
func (from Int8x16) AsInt32x4() (to Int32x4)

// Int64x2 converts from Int8x16 to Int64x2
func (from Int8x16) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Int8x16 to Uint8x16
func (from Int8x16) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int8x16 to Uint16x8
func (from Int8x16) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int8x16 to Uint32x4
func (from Int8x16) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int8x16 to Uint64x2
func (from Int8x16) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int8x32 to Float32x8
func (from Int8x32) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int8x32 to Float64x4
func (from Int8x32) AsFloat64x4() (to Float64x4)

// Int16x16 converts from Int8x32 to Int16x16
func (from Int8x32) AsInt16x16() (to Int16x16)

// Int32x8 converts from Int8x32 to Int32x8
func (from Int8x32) AsInt32x8() (to Int32x8)

// Int64x4 converts from Int8x32 to Int64x4
func (from Int8x32) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Int8x32 to Uint8x32
func (from Int8x32) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int8x32 to Uint16x16
func (from Int8x32) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int8x32 to Uint32x8
func (from Int8x32) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int8x32 to Uint64x4
func (from Int8x32) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int8x64 to Float32x16
func (from Int8x64) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int8x64 to Float64x8
func (from Int8x64) AsFloat64x8() (to Float64x8)

// Int16x32 converts from Int8x64 to Int16x32
func (from Int8x64) AsInt16x32() (to Int16x32)

// Int32x16 converts from Int8x64 to Int32x16
func (from Int8x64) AsInt32x16() (to Int32x16)

// Int64x8 converts from Int8x64 to Int64x8
func (from Int8x64) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Int8x64 to Uint8x64
func (from Int8x64) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int8x64 to Uint16x32
func (from Int8x64) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int8x64 to Uint32x16
func (from Int8x64) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int8x64 to Uint64x8
func (from Int8x64) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int16x8 to Float32x4
func (from Int16x8) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int16x8 to Float64x2
func (from Int16x8) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Int16x8 to Int8x16
func (from Int16x8) AsInt8x16() (to Int8x16)

// Int32x4 converts from Int16x8 to Int32x4
func (from Int16x8) AsInt32x4() (to Int32x4)

// Int64x2 converts from Int16x8 to Int64x2
func (from Int16x8) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Int16x8 to Uint8x16
func (from Int16x8) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int16x8 to Uint16x8
func (from Int16x8) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int16x8 to Uint32x4
func (from Int16x8) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int16x8 to Uint64x2
func (from Int16x8) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int16x16 to Float32x8
func (from Int16x16) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int16x16 to Float64x4
func (from Int16x16) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Int16x16 to Int8x32
func (from Int16x16) AsInt8x32() (to Int8x32)

// Int32x8 converts from Int16x16 to Int32x8
func (from Int16x16) AsInt32x8() (to Int32x8)

// Int64x4 converts from Int16x16 to Int64x4
func (from Int16x16) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Int16x16 to Uint8x32
func (from Int16x16) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int16x16 to Uint16x16
func (from Int16x16) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int16x16 to Uint32x8
func (from Int16x16) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int16x16 to Uint64x4
func (from Int16x16) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int16x32 to Float32x16
func (from Int16x32) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int16x32 to Float64x8
func (from Int16x32) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Int16x32 to Int8x64
func (from Int16x32) AsInt8x64() (to Int8x64)

// Int32x16 converts from Int16x32 to Int32x16
func (from Int16x32) AsInt32x16() (to Int32x16)

// Int64x8 converts from Int16x32 to Int64x8
func (from Int16x32) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Int16x32 to Uint8x64
func (from Int16x32) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int16x32 to Uint16x32
func (from Int16x32) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int16x32 to Uint32x16
func (from Int16x32) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int16x32 to Uint64x8
func (from Int16x32) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int32x4 to Float32x4
func (from Int32x4) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int32x4 to Float64x2
func (from Int32x4) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Int32x4 to Int8x16
func (from Int32x4) AsInt8x16() (to Int8x16)

// Int16x8 converts from Int32x4 to Int16x8
func (from Int32x4) AsInt16x8() (to Int16x8)

// Int64x2 converts from Int32x4 to Int64x2
func (from Int32x4) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Int32x4 to Uint8x16
func (from Int32x4) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int32x4 to Uint16x8
func (from Int32x4) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int32x4 to Uint32x4
func (from Int32x4) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int32x4 to Uint64x2
func (from Int32x4) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int32x8 to Float32x8
func (from Int32x8) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int32x8 to Float64x4
func (from Int32x8) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Int32x8 to Int8x32
func (from Int32x8) AsInt8x32() (to Int8x32)

// Int16x16 converts from Int32x8 to Int16x16
func (from Int32x8) AsInt16x16() (to Int16x16)

// Int64x4 converts from Int32x8 to Int64x4
func (from Int32x8) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Int32x8 to Uint8x32
func (from Int32x8) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int32x8 to Uint16x16
func (from Int32x8) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int32x8 to Uint32x8
func (from Int32x8) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int32x8 to Uint64x4
func (from Int32x8) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int32x16 to Float32x16
func (from Int32x16) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int32x16 to Float64x8
func (from Int32x16) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Int32x16 to Int8x64
func (from Int32x16) AsInt8x64() (to Int8x64)

// Int16x32 converts from Int32x16 to Int16x32
func (from Int32x16) AsInt16x32() (to Int16x32)

// Int64x8 converts from Int32x16 to Int64x8
func (from Int32x16) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Int32x16 to Uint8x64
func (from Int32x16) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int32x16 to Uint16x32
func (from Int32x16) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int32x16 to Uint32x16
func (from Int32x16) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int32x16 to Uint64x8
func (from Int32x16) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Int64x2 to Float32x4
func (from Int64x2) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Int64x2 to Float64x2
func (from Int64x2) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Int64x2 to Int8x16
func (from Int64x2) AsInt8x16() (to Int8x16)

// Int16x8 converts from Int64x2 to Int16x8
func (from Int64x2) AsInt16x8() (to Int16x8)

// Int32x4 converts from Int64x2 to Int32x4
func (from Int64x2) AsInt32x4() (to Int32x4)

// Uint8x16 converts from Int64x2 to Uint8x16
func (from Int64x2) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Int64x2 to Uint16x8
func (from Int64x2) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Int64x2 to Uint32x4
func (from Int64x2) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Int64x2 to Uint64x2
func (from Int64x2) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Int64x4 to Float32x8
func (from Int64x4) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Int64x4 to Float64x4
func (from Int64x4) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Int64x4 to Int8x32
func (from Int64x4) AsInt8x32() (to Int8x32)

// Int16x16 converts from Int64x4 to Int16x16
func (from Int64x4) AsInt16x16() (to Int16x16)

// Int32x8 converts from Int64x4 to Int32x8
func (from Int64x4) AsInt32x8() (to Int32x8)

// Uint8x32 converts from Int64x4 to Uint8x32
func (from Int64x4) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Int64x4 to Uint16x16
func (from Int64x4) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Int64x4 to Uint32x8
func (from Int64x4) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Int64x4 to Uint64x4
func (from Int64x4) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Int64x8 to Float32x16
func (from Int64x8) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Int64x8 to Float64x8
func (from Int64x8) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Int64x8 to Int8x64
func (from Int64x8) AsInt8x64() (to Int8x64)

// Int16x32 converts from Int64x8 to Int16x32
func (from Int64x8) AsInt16x32() (to Int16x32)

// Int32x16 converts from Int64x8 to Int32x16
func (from Int64x8) AsInt32x16() (to Int32x16)

// Uint8x64 converts from Int64x8 to Uint8x64
func (from Int64x8) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Int64x8 to Uint16x32
func (from Int64x8) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Int64x8 to Uint32x16
func (from Int64x8) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Int64x8 to Uint64x8
func (from Int64x8) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint8x16 to Float32x4
func (from Uint8x16) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint8x16 to Float64x2
func (from Uint8x16) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint8x16 to Int8x16
func (from Uint8x16) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint8x16 to Int16x8
func (from Uint8x16) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint8x16 to Int32x4
func (from Uint8x16) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint8x16 to Int64x2
func (from Uint8x16) AsInt64x2() (to Int64x2)

// Uint16x8 converts from Uint8x16 to Uint16x8
func (from Uint8x16) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Uint8x16 to Uint32x4
func (from Uint8x16) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Uint8x16 to Uint64x2
func (from Uint8x16) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Uint8x32 to Float32x8
func (from Uint8x32) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint8x32 to Float64x4
func (from Uint8x32) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint8x32 to Int8x32
func (from Uint8x32) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint8x32 to Int16x16
func (from Uint8x32) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint8x32 to Int32x8
func (from Uint8x32) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint8x32 to Int64x4
func (from Uint8x32) AsInt64x4() (to Int64x4)

// Uint16x16 converts from Uint8x32 to Uint16x16
func (from Uint8x32) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Uint8x32 to Uint32x8
func (from Uint8x32) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Uint8x32 to Uint64x4
func (from Uint8x32) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Uint8x64 to Float32x16
func (from Uint8x64) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint8x64 to Float64x8
func (from Uint8x64) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint8x64 to Int8x64
func (from Uint8x64) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint8x64 to Int16x32
func (from Uint8x64) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint8x64 to Int32x16
func (from Uint8x64) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint8x64 to Int64x8
func (from Uint8x64) AsInt64x8() (to Int64x8)

// Uint16x32 converts from Uint8x64 to Uint16x32
func (from Uint8x64) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Uint8x64 to Uint32x16
func (from Uint8x64) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Uint8x64 to Uint64x8
func (from Uint8x64) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint16x8 to Float32x4
func (from Uint16x8) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint16x8 to Float64x2
func (from Uint16x8) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint16x8 to Int8x16
func (from Uint16x8) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint16x8 to Int16x8
func (from Uint16x8) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint16x8 to Int32x4
func (from Uint16x8) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint16x8 to Int64x2
func (from Uint16x8) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Uint16x8 to Uint8x16
func (from Uint16x8) AsUint8x16() (to Uint8x16)

// Uint32x4 converts from Uint16x8 to Uint32x4
func (from Uint16x8) AsUint32x4() (to Uint32x4)

// Uint64x2 converts from Uint16x8 to Uint64x2
func (from Uint16x8) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Uint16x16 to Float32x8
func (from Uint16x16) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint16x16 to Float64x4
func (from Uint16x16) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint16x16 to Int8x32
func (from Uint16x16) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint16x16 to Int16x16
func (from Uint16x16) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint16x16 to Int32x8
func (from Uint16x16) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint16x16 to Int64x4
func (from Uint16x16) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Uint16x16 to Uint8x32
func (from Uint16x16) AsUint8x32() (to Uint8x32)

// Uint32x8 converts from Uint16x16 to Uint32x8
func (from Uint16x16) AsUint32x8() (to Uint32x8)

// Uint64x4 converts from Uint16x16 to Uint64x4
func (from Uint16x16) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Uint16x32 to Float32x16
func (from Uint16x32) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint16x32 to Float64x8
func (from Uint16x32) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint16x32 to Int8x64
func (from Uint16x32) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint16x32 to Int16x32
func (from Uint16x32) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint16x32 to Int32x16
func (from Uint16x32) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint16x32 to Int64x8
func (from Uint16x32) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Uint16x32 to Uint8x64
func (from Uint16x32) AsUint8x64() (to Uint8x64)

// Uint32x16 converts from Uint16x32 to Uint32x16
func (from Uint16x32) AsUint32x16() (to Uint32x16)

// Uint64x8 converts from Uint16x32 to Uint64x8
func (from Uint16x32) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint32x4 to Float32x4
func (from Uint32x4) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint32x4 to Float64x2
func (from Uint32x4) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint32x4 to Int8x16
func (from Uint32x4) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint32x4 to Int16x8
func (from Uint32x4) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint32x4 to Int32x4
func (from Uint32x4) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint32x4 to Int64x2
func (from Uint32x4) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Uint32x4 to Uint8x16
func (from Uint32x4) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Uint32x4 to Uint16x8
func (from Uint32x4) AsUint16x8() (to Uint16x8)

// Uint64x2 converts from Uint32x4 to Uint64x2
func (from Uint32x4) AsUint64x2() (to Uint64x2)

// Float32x8 converts from Uint32x8 to Float32x8
func (from Uint32x8) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint32x8 to Float64x4
func (from Uint32x8) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint32x8 to Int8x32
func (from Uint32x8) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint32x8 to Int16x16
func (from Uint32x8) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint32x8 to Int32x8
func (from Uint32x8) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint32x8 to Int64x4
func (from Uint32x8) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Uint32x8 to Uint8x32
func (from Uint32x8) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Uint32x8 to Uint16x16
func (from Uint32x8) AsUint16x16() (to Uint16x16)

// Uint64x4 converts from Uint32x8 to Uint64x4
func (from Uint32x8) AsUint64x4() (to Uint64x4)

// Float32x16 converts from Uint32x16 to Float32x16
func (from Uint32x16) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint32x16 to Float64x8
func (from Uint32x16) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint32x16 to Int8x64
func (from Uint32x16) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint32x16 to Int16x32
func (from Uint32x16) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint32x16 to Int32x16
func (from Uint32x16) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint32x16 to Int64x8
func (from Uint32x16) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Uint32x16 to Uint8x64
func (from Uint32x16) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Uint32x16 to Uint16x32
func (from Uint32x16) AsUint16x32() (to Uint16x32)

// Uint64x8 converts from Uint32x16 to Uint64x8
func (from Uint32x16) AsUint64x8() (to Uint64x8)

// Float32x4 converts from Uint64x2 to Float32x4
func (from Uint64x2) AsFloat32x4() (to Float32x4)

// Float64x2 converts from Uint64x2 to Float64x2
func (from Uint64x2) AsFloat64x2() (to Float64x2)

// Int8x16 converts from Uint64x2 to Int8x16
func (from Uint64x2) AsInt8x16() (to Int8x16)

// Int16x8 converts from Uint64x2 to Int16x8
func (from Uint64x2) AsInt16x8() (to Int16x8)

// Int32x4 converts from Uint64x2 to Int32x4
func (from Uint64x2) AsInt32x4() (to Int32x4)

// Int64x2 converts from Uint64x2 to Int64x2
func (from Uint64x2) AsInt64x2() (to Int64x2)

// Uint8x16 converts from Uint64x2 to Uint8x16
func (from Uint64x2) AsUint8x16() (to Uint8x16)

// Uint16x8 converts from Uint64x2 to Uint16x8
func (from Uint64x2) AsUint16x8() (to Uint16x8)

// Uint32x4 converts from Uint64x2 to Uint32x4
func (from Uint64x2) AsUint32x4() (to Uint32x4)

// Float32x8 converts from Uint64x4 to Float32x8
func (from Uint64x4) AsFloat32x8() (to Float32x8)

// Float64x4 converts from Uint64x4 to Float64x4
func (from Uint64x4) AsFloat64x4() (to Float64x4)

// Int8x32 converts from Uint64x4 to Int8x32
func (from Uint64x4) AsInt8x32() (to Int8x32)

// Int16x16 converts from Uint64x4 to Int16x16
func (from Uint64x4) AsInt16x16() (to Int16x16)

// Int32x8 converts from Uint64x4 to Int32x8
func (from Uint64x4) AsInt32x8() (to Int32x8)

// Int64x4 converts from Uint64x4 to Int64x4
func (from Uint64x4) AsInt64x4() (to Int64x4)

// Uint8x32 converts from Uint64x4 to Uint8x32
func (from Uint64x4) AsUint8x32() (to Uint8x32)

// Uint16x16 converts from Uint64x4 to Uint16x16
func (from Uint64x4) AsUint16x16() (to Uint16x16)

// Uint32x8 converts from Uint64x4 to Uint32x8
func (from Uint64x4) AsUint32x8() (to Uint32x8)

// Float32x16 converts from Uint64x8 to Float32x16
func (from Uint64x8) AsFloat32x16() (to Float32x16)

// Float64x8 converts from Uint64x8 to Float64x8
func (from Uint64x8) AsFloat64x8() (to Float64x8)

// Int8x64 converts from Uint64x8 to Int8x64
func (from Uint64x8) AsInt8x64() (to Int8x64)

// Int16x32 converts from Uint64x8 to Int16x32
func (from Uint64x8) AsInt16x32() (to Int16x32)

// Int32x16 converts from Uint64x8 to Int32x16
func (from Uint64x8) AsInt32x16() (to Int32x16)

// Int64x8 converts from Uint64x8 to Int64x8
func (from Uint64x8) AsInt64x8() (to Int64x8)

// Uint8x64 converts from Uint64x8 to Uint8x64
func (from Uint64x8) AsUint8x64() (to Uint8x64)

// Uint16x32 converts from Uint64x8 to Uint16x32
func (from Uint64x8) AsUint16x32() (to Uint16x32)

// Uint32x16 converts from Uint64x8 to Uint32x16
func (from Uint64x8) AsUint32x16() (to Uint32x16)

// converts from Mask8x16 to Int8x16
func (from Mask8x16) AsInt8x16() (to Int8x16)

// converts from Int8x16 to Mask8x16
func (from Int8x16) AsMask8x16() (to Mask8x16)

func (x Mask8x16) And(y Mask8x16) Mask8x16

func (x Mask8x16) Or(y Mask8x16) Mask8x16

// converts from Mask8x32 to Int8x32
func (from Mask8x32) AsInt8x32() (to Int8x32)

// converts from Int8x32 to Mask8x32
func (from Int8x32) AsMask8x32() (to Mask8x32)

func (x Mask8x32) And(y Mask8x32) Mask8x32

func (x Mask8x32) Or(y Mask8x32) Mask8x32

// converts from Mask8x64 to Int8x64
func (from Mask8x64) AsInt8x64() (to Int8x64)

// converts from Int8x64 to Mask8x64
func (from Int8x64) AsMask8x64() (to Mask8x64)

func (x Mask8x64) And(y Mask8x64) Mask8x64

func (x Mask8x64) Or(y Mask8x64) Mask8x64

// converts from Mask16x8 to Int16x8
func (from Mask16x8) AsInt16x8() (to Int16x8)

// converts from Int16x8 to Mask16x8
func (from Int16x8) AsMask16x8() (to Mask16x8)

func (x Mask16x8) And(y Mask16x8) Mask16x8

func (x Mask16x8) Or(y Mask16x8) Mask16x8

// converts from Mask16x16 to Int16x16
func (from Mask16x16) AsInt16x16() (to Int16x16)

// converts from Int16x16 to Mask16x16
func (from Int16x16) AsMask16x16() (to Mask16x16)

func (x Mask16x16) And(y Mask16x16) Mask16x16

func (x Mask16x16) Or(y Mask16x16) Mask16x16

// converts from Mask16x32 to Int16x32
func (from Mask16x32) AsInt16x32() (to Int16x32)

// converts from Int16x32 to Mask16x32
func (from Int16x32) AsMask16x32() (to Mask16x32)

func (x Mask16x32) And(y Mask16x32) Mask16x32

func (x Mask16x32) Or(y Mask16x32) Mask16x32

// converts from Mask32x4 to Int32x4
func (from Mask32x4) AsInt32x4() (to Int32x4)

// converts from Int32x4 to Mask32x4
func (from Int32x4) AsMask32x4() (to Mask32x4)

func (x Mask32x4) And(y Mask32x4) Mask32x4

func (x Mask32x4) Or(y Mask32x4) Mask32x4

// converts from Mask32x8 to Int32x8
func (from Mask32x8) AsInt32x8() (to Int32x8)

// converts from Int32x8 to Mask32x8
func (from Int32x8) AsMask32x8() (to Mask32x8)

func (x Mask32x8) And(y Mask32x8) Mask32x8

func (x Mask32x8) Or(y Mask32x8) Mask32x8

// converts from Mask32x16 to Int32x16
func (from Mask32x16) AsInt32x16() (to Int32x16)

// converts from Int32x16 to Mask32x16
func (from Int32x16) AsMask32x16() (to Mask32x16)

func (x Mask32x16) And(y Mask32x16) Mask32x16

func (x Mask32x16) Or(y Mask32x16) Mask32x16

// converts from Mask64x2 to Int64x2
func (from Mask64x2) AsInt64x2() (to Int64x2)

// converts from Int64x2 to Mask64x2
func (from Int64x2) AsMask64x2() (to Mask64x2)

func (x Mask64x2) And(y Mask64x2) Mask64x2

func (x Mask64x2) Or(y Mask64x2) Mask64x2

// converts from Mask64x4 to Int64x4
func (from Mask64x4) AsInt64x4() (to Int64x4)

// converts from Int64x4 to Mask64x4
func (from Int64x4) AsMask64x4() (to Mask64x4)

func (x Mask64x4) And(y Mask64x4) Mask64x4

func (x Mask64x4) Or(y Mask64x4) Mask64x4

// converts from Mask64x8 to Int64x8
func (from Mask64x8) AsInt64x8() (to Int64x8)

// converts from Int64x8 to Mask64x8
func (from Int64x8) AsMask64x8() (to Mask64x8)

func (x Mask64x8) And(y Mask64x8) Mask64x8

func (x Mask64x8) Or(y Mask64x8) Mask64x8