diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index bb0476fc20c..b21d58b4a44 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1,807 +1,807 @@ // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. -(AbsoluteInt16x16 ...) => (VPABSW256 ...) -(AbsoluteInt16x32 ...) => (VPABSW512 ...) -(AbsoluteInt16x8 ...) => (VPABSW128 ...) -(AbsoluteInt32x16 ...) => (VPABSD512 ...) -(AbsoluteInt32x4 ...) => (VPABSD128 ...) -(AbsoluteInt32x8 ...) => (VPABSD256 ...) -(AbsoluteInt64x2 ...) => (VPABSQ128 ...) -(AbsoluteInt64x4 ...) => (VPABSQ256 ...) -(AbsoluteInt64x8 ...) => (VPABSQ512 ...) (AbsoluteInt8x16 ...) => (VPABSB128 ...) (AbsoluteInt8x32 ...) => (VPABSB256 ...) (AbsoluteInt8x64 ...) => (VPABSB512 ...) -(AddFloat32x16 ...) => (VADDPS512 ...) +(AbsoluteInt16x8 ...) => (VPABSW128 ...) +(AbsoluteInt16x16 ...) => (VPABSW256 ...) +(AbsoluteInt16x32 ...) => (VPABSW512 ...) +(AbsoluteInt32x4 ...) => (VPABSD128 ...) +(AbsoluteInt32x8 ...) => (VPABSD256 ...) +(AbsoluteInt32x16 ...) => (VPABSD512 ...) +(AbsoluteInt64x2 ...) => (VPABSQ128 ...) +(AbsoluteInt64x4 ...) => (VPABSQ256 ...) +(AbsoluteInt64x8 ...) => (VPABSQ512 ...) (AddFloat32x4 ...) => (VADDPS128 ...) (AddFloat32x8 ...) => (VADDPS256 ...) +(AddFloat32x16 ...) => (VADDPS512 ...) (AddFloat64x2 ...) => (VADDPD128 ...) (AddFloat64x4 ...) => (VADDPD256 ...) (AddFloat64x8 ...) => (VADDPD512 ...) -(AddInt16x16 ...) => (VPADDW256 ...) -(AddInt16x32 ...) => (VPADDW512 ...) -(AddInt16x8 ...) => (VPADDW128 ...) -(AddInt32x16 ...) => (VPADDD512 ...) -(AddInt32x4 ...) => (VPADDD128 ...) -(AddInt32x8 ...) => (VPADDD256 ...) -(AddInt64x2 ...) => (VPADDQ128 ...) -(AddInt64x4 ...) => (VPADDQ256 ...) -(AddInt64x8 ...) => (VPADDQ512 ...) (AddInt8x16 ...) => (VPADDB128 ...) (AddInt8x32 ...) => (VPADDB256 ...) (AddInt8x64 ...) => (VPADDB512 ...) -(AddUint16x16 ...) => (VPADDW256 ...) -(AddUint16x32 ...) => (VPADDW512 ...) -(AddUint16x8 ...) => (VPADDW128 ...) -(AddUint32x16 ...) => (VPADDD512 ...) -(AddUint32x4 ...) => (VPADDD128 ...) -(AddUint32x8 ...) => (VPADDD256 ...) -(AddUint64x2 ...) => (VPADDQ128 ...) -(AddUint64x4 ...) => (VPADDQ256 ...) -(AddUint64x8 ...) => (VPADDQ512 ...) +(AddInt16x8 ...) => (VPADDW128 ...) +(AddInt16x16 ...) => (VPADDW256 ...) +(AddInt16x32 ...) => (VPADDW512 ...) +(AddInt32x4 ...) => (VPADDD128 ...) +(AddInt32x8 ...) => (VPADDD256 ...) +(AddInt32x16 ...) => (VPADDD512 ...) +(AddInt64x2 ...) => (VPADDQ128 ...) +(AddInt64x4 ...) => (VPADDQ256 ...) +(AddInt64x8 ...) => (VPADDQ512 ...) (AddUint8x16 ...) => (VPADDB128 ...) (AddUint8x32 ...) => (VPADDB256 ...) (AddUint8x64 ...) => (VPADDB512 ...) +(AddUint16x8 ...) => (VPADDW128 ...) +(AddUint16x16 ...) => (VPADDW256 ...) +(AddUint16x32 ...) => (VPADDW512 ...) +(AddUint32x4 ...) => (VPADDD128 ...) +(AddUint32x8 ...) => (VPADDD256 ...) +(AddUint32x16 ...) => (VPADDD512 ...) +(AddUint64x2 ...) => (VPADDQ128 ...) +(AddUint64x4 ...) => (VPADDQ256 ...) +(AddUint64x8 ...) => (VPADDQ512 ...) (AddSubFloat32x4 ...) => (VADDSUBPS128 ...) (AddSubFloat32x8 ...) => (VADDSUBPS256 ...) (AddSubFloat64x2 ...) => (VADDSUBPD128 ...) (AddSubFloat64x4 ...) => (VADDSUBPD256 ...) -(AndFloat32x16 ...) => (VANDPS512 ...) (AndFloat32x4 ...) => (VANDPS128 ...) (AndFloat32x8 ...) => (VANDPS256 ...) +(AndFloat32x16 ...) => (VANDPS512 ...) (AndFloat64x2 ...) => (VANDPD128 ...) (AndFloat64x4 ...) => (VANDPD256 ...) (AndFloat64x8 ...) => (VANDPD512 ...) -(AndInt16x16 ...) => (VPAND256 ...) +(AndInt8x16 ...) => (VPAND128 ...) +(AndInt8x32 ...) => (VPAND256 ...) (AndInt16x8 ...) => (VPAND128 ...) -(AndInt32x16 ...) => (VPANDD512 ...) +(AndInt16x16 ...) => (VPAND256 ...) (AndInt32x4 ...) => (VPAND128 ...) (AndInt32x8 ...) => (VPAND256 ...) +(AndInt32x16 ...) => (VPANDD512 ...) (AndInt64x2 ...) => (VPAND128 ...) (AndInt64x4 ...) => (VPAND256 ...) (AndInt64x8 ...) => (VPANDQ512 ...) -(AndInt8x16 ...) => (VPAND128 ...) -(AndInt8x32 ...) => (VPAND256 ...) -(AndUint16x16 ...) => (VPAND256 ...) +(AndUint8x16 ...) => (VPAND128 ...) +(AndUint8x32 ...) => (VPAND256 ...) (AndUint16x8 ...) => (VPAND128 ...) -(AndUint32x16 ...) => (VPANDD512 ...) +(AndUint16x16 ...) => (VPAND256 ...) (AndUint32x4 ...) => (VPAND128 ...) (AndUint32x8 ...) => (VPAND256 ...) +(AndUint32x16 ...) => (VPANDD512 ...) (AndUint64x2 ...) => (VPAND128 ...) (AndUint64x4 ...) => (VPAND256 ...) (AndUint64x8 ...) => (VPANDQ512 ...) -(AndUint8x16 ...) => (VPAND128 ...) -(AndUint8x32 ...) => (VPAND256 ...) -(AndNotFloat32x16 ...) => (VANDNPS512 ...) (AndNotFloat32x4 ...) => (VANDNPS128 ...) (AndNotFloat32x8 ...) => (VANDNPS256 ...) +(AndNotFloat32x16 ...) => (VANDNPS512 ...) (AndNotFloat64x2 ...) => (VANDNPD128 ...) (AndNotFloat64x4 ...) => (VANDNPD256 ...) (AndNotFloat64x8 ...) => (VANDNPD512 ...) -(AndNotInt16x16 ...) => (VPANDN256 ...) +(AndNotInt8x16 ...) => (VPANDN128 ...) +(AndNotInt8x32 ...) => (VPANDN256 ...) (AndNotInt16x8 ...) => (VPANDN128 ...) -(AndNotInt32x16 ...) => (VPANDND512 ...) +(AndNotInt16x16 ...) => (VPANDN256 ...) (AndNotInt32x4 ...) => (VPANDN128 ...) (AndNotInt32x8 ...) => (VPANDN256 ...) +(AndNotInt32x16 ...) => (VPANDND512 ...) (AndNotInt64x2 ...) => (VPANDN128 ...) (AndNotInt64x4 ...) => (VPANDN256 ...) (AndNotInt64x8 ...) => (VPANDNQ512 ...) -(AndNotInt8x16 ...) => (VPANDN128 ...) -(AndNotInt8x32 ...) => (VPANDN256 ...) -(AndNotUint16x16 ...) => (VPANDN256 ...) +(AndNotUint8x16 ...) => (VPANDN128 ...) +(AndNotUint8x32 ...) => (VPANDN256 ...) (AndNotUint16x8 ...) => (VPANDN128 ...) -(AndNotUint32x16 ...) => (VPANDND512 ...) +(AndNotUint16x16 ...) => (VPANDN256 ...) (AndNotUint32x4 ...) => (VPANDN128 ...) (AndNotUint32x8 ...) => (VPANDN256 ...) +(AndNotUint32x16 ...) => (VPANDND512 ...) (AndNotUint64x2 ...) => (VPANDN128 ...) (AndNotUint64x4 ...) => (VPANDN256 ...) (AndNotUint64x8 ...) => (VPANDNQ512 ...) -(AndNotUint8x16 ...) => (VPANDN128 ...) -(AndNotUint8x32 ...) => (VPANDN256 ...) -(ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...) (ApproximateReciprocalFloat32x4 ...) => (VRCP14PS128 ...) (ApproximateReciprocalFloat32x8 ...) => (VRCP14PS256 ...) +(ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...) (ApproximateReciprocalFloat64x2 ...) => (VRCP14PD128 ...) (ApproximateReciprocalFloat64x4 ...) => (VRCP14PD256 ...) (ApproximateReciprocalFloat64x8 ...) => (VRCP14PD512 ...) -(ApproximateReciprocalOfSqrtFloat32x16 ...) => (VRSQRT14PS512 ...) (ApproximateReciprocalOfSqrtFloat32x4 ...) => (VRSQRTPS128 ...) (ApproximateReciprocalOfSqrtFloat32x8 ...) => (VRSQRTPS256 ...) +(ApproximateReciprocalOfSqrtFloat32x16 ...) => (VRSQRT14PS512 ...) (ApproximateReciprocalOfSqrtFloat64x2 ...) => (VRSQRT14PD128 ...) (ApproximateReciprocalOfSqrtFloat64x4 ...) => (VRSQRT14PD256 ...) (ApproximateReciprocalOfSqrtFloat64x8 ...) => (VRSQRT14PD512 ...) -(AverageUint16x16 ...) => (VPAVGW256 ...) -(AverageUint16x32 ...) => (VPAVGW512 ...) -(AverageUint16x8 ...) => (VPAVGW128 ...) (AverageUint8x16 ...) => (VPAVGB128 ...) (AverageUint8x32 ...) => (VPAVGB256 ...) (AverageUint8x64 ...) => (VPAVGB512 ...) +(AverageUint16x8 ...) => (VPAVGW128 ...) +(AverageUint16x16 ...) => (VPAVGW256 ...) +(AverageUint16x32 ...) => (VPAVGW512 ...) (CeilFloat32x4 x) => (VROUNDPS128 [2] x) (CeilFloat32x8 x) => (VROUNDPS256 [2] x) (CeilFloat64x2 x) => (VROUNDPD128 [2] x) (CeilFloat64x4 x) => (VROUNDPD256 [2] x) -(CeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+10] x) (CeilSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+10] x) (CeilSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+10] x) +(CeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+10] x) (CeilSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+10] x) (CeilSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+10] x) (CeilSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+10] x) -(CeilWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x) (CeilWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+2] x) (CeilWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+2] x) +(CeilWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x) (CeilWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x) (CeilWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x) (CeilWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x) -(DiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+10] x) (DiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+10] x) (DiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+10] x) +(DiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+10] x) (DiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+10] x) (DiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+10] x) (DiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+10] x) -(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x) (DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x) (DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x) +(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x) (DiffWithCeilWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x) (DiffWithCeilWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x) (DiffWithCeilWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x) -(DiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+9] x) (DiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+9] x) (DiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+9] x) +(DiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+9] x) (DiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+9] x) (DiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+9] x) (DiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+9] x) -(DiffWithFloorWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x) (DiffWithFloorWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x) (DiffWithFloorWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x) +(DiffWithFloorWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x) (DiffWithFloorWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x) (DiffWithFloorWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x) (DiffWithFloorWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x) -(DiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+8] x) (DiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+8] x) (DiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+8] x) +(DiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+8] x) (DiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+8] x) (DiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+8] x) (DiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+8] x) -(DiffWithRoundWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x) (DiffWithRoundWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x) (DiffWithRoundWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x) +(DiffWithRoundWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x) (DiffWithRoundWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x) (DiffWithRoundWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x) (DiffWithRoundWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x) -(DiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+11] x) (DiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+11] x) (DiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+11] x) +(DiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+11] x) (DiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+11] x) (DiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+11] x) (DiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+11] x) -(DiffWithTruncWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x) (DiffWithTruncWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x) (DiffWithTruncWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x) +(DiffWithTruncWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x) (DiffWithTruncWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x) (DiffWithTruncWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x) (DiffWithTruncWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x) -(DivFloat32x16 ...) => (VDIVPS512 ...) (DivFloat32x4 ...) => (VDIVPS128 ...) (DivFloat32x8 ...) => (VDIVPS256 ...) +(DivFloat32x16 ...) => (VDIVPS512 ...) (DivFloat64x2 ...) => (VDIVPD128 ...) (DivFloat64x4 ...) => (VDIVPD256 ...) (DivFloat64x8 ...) => (VDIVPD512 ...) (DotProdBroadcastFloat64x2 x y) => (VDPPD128 [127] x y) -(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) (EqualFloat32x4 x y) => (VCMPPS128 [0] x y) (EqualFloat32x8 x y) => (VCMPPS256 [0] x y) +(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) (EqualFloat64x2 x y) => (VCMPPD128 [0] x y) (EqualFloat64x4 x y) => (VCMPPD256 [0] x y) (EqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [0] x y)) -(EqualInt16x16 ...) => (VPCMPEQW256 ...) -(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [0] x y)) -(EqualInt16x8 ...) => (VPCMPEQW128 ...) -(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [0] x y)) -(EqualInt32x4 ...) => (VPCMPEQD128 ...) -(EqualInt32x8 ...) => (VPCMPEQD256 ...) -(EqualInt64x2 ...) => (VPCMPEQQ128 ...) -(EqualInt64x4 ...) => (VPCMPEQQ256 ...) -(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y)) (EqualInt8x16 ...) => (VPCMPEQB128 ...) (EqualInt8x32 ...) => (VPCMPEQB256 ...) (EqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [0] x y)) -(EqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [0] x y)) -(EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y)) -(EqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [0] x y)) -(EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [0] x y)) -(EqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [0] x y)) -(EqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [0] x y)) -(EqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y)) -(EqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y)) -(EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y)) +(EqualInt16x8 ...) => (VPCMPEQW128 ...) +(EqualInt16x16 ...) => (VPCMPEQW256 ...) +(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [0] x y)) +(EqualInt32x4 ...) => (VPCMPEQD128 ...) +(EqualInt32x8 ...) => (VPCMPEQD256 ...) +(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [0] x y)) +(EqualInt64x2 ...) => (VPCMPEQQ128 ...) +(EqualInt64x4 ...) => (VPCMPEQQ256 ...) +(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y)) (EqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [0] x y)) (EqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [0] x y)) (EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y)) +(EqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [0] x y)) +(EqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [0] x y)) +(EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y)) +(EqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [0] x y)) +(EqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [0] x y)) +(EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [0] x y)) +(EqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y)) +(EqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y)) +(EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y)) (FloorFloat32x4 x) => (VROUNDPS128 [1] x) (FloorFloat32x8 x) => (VROUNDPS256 [1] x) (FloorFloat64x2 x) => (VROUNDPD128 [1] x) (FloorFloat64x4 x) => (VROUNDPD256 [1] x) -(FloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+9] x) (FloorSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+9] x) (FloorSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+9] x) +(FloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+9] x) (FloorSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+9] x) (FloorSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+9] x) (FloorSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+9] x) -(FloorWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x) (FloorWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+1] x) (FloorWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+1] x) +(FloorWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x) (FloorWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x) (FloorWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x) (FloorWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x) -(FusedMultiplyAddFloat32x16 ...) => (VFMADD213PS512 ...) (FusedMultiplyAddFloat32x4 ...) => (VFMADD213PS128 ...) (FusedMultiplyAddFloat32x8 ...) => (VFMADD213PS256 ...) +(FusedMultiplyAddFloat32x16 ...) => (VFMADD213PS512 ...) (FusedMultiplyAddFloat64x2 ...) => (VFMADD213PD128 ...) (FusedMultiplyAddFloat64x4 ...) => (VFMADD213PD256 ...) (FusedMultiplyAddFloat64x8 ...) => (VFMADD213PD512 ...) -(FusedMultiplyAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...) (FusedMultiplyAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...) (FusedMultiplyAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...) +(FusedMultiplyAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...) (FusedMultiplyAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...) (FusedMultiplyAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...) (FusedMultiplyAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...) -(FusedMultiplySubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...) (FusedMultiplySubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...) (FusedMultiplySubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...) +(FusedMultiplySubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...) (FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...) (FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...) (FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...) +(GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x) (GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x) (GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x) (GetElemInt64x2 [a] x) => (VPEXTRQ128 [a] x) -(GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x) +(GetElemUint8x16 [a] x) => (VPEXTRB128 [a] x) (GetElemUint16x8 [a] x) => (VPEXTRW128 [a] x) (GetElemUint32x4 [a] x) => (VPEXTRD128 [a] x) (GetElemUint64x2 [a] x) => (VPEXTRQ128 [a] x) -(GetElemUint8x16 [a] x) => (VPEXTRB128 [a] x) -(GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y)) (GreaterFloat32x4 x y) => (VCMPPS128 [6] x y) (GreaterFloat32x8 x y) => (VCMPPS256 [6] x y) +(GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y)) (GreaterFloat64x2 x y) => (VCMPPD128 [6] x y) (GreaterFloat64x4 x y) => (VCMPPD256 [6] x y) (GreaterFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [6] x y)) -(GreaterInt16x16 ...) => (VPCMPGTW256 ...) -(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [6] x y)) -(GreaterInt16x8 ...) => (VPCMPGTW128 ...) -(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [6] x y)) -(GreaterInt32x4 ...) => (VPCMPGTD128 ...) -(GreaterInt32x8 ...) => (VPCMPGTD256 ...) -(GreaterInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [6] x y)) -(GreaterInt64x4 ...) => (VPCMPGTQ256 ...) -(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [6] x y)) (GreaterInt8x16 ...) => (VPCMPGTB128 ...) (GreaterInt8x32 ...) => (VPCMPGTB256 ...) (GreaterInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [6] x y)) -(GreaterUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [6] x y)) -(GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [6] x y)) -(GreaterUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [6] x y)) -(GreaterUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [6] x y)) -(GreaterUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [6] x y)) -(GreaterUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [6] x y)) -(GreaterUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [6] x y)) -(GreaterUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [6] x y)) -(GreaterUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [6] x y)) +(GreaterInt16x8 ...) => (VPCMPGTW128 ...) +(GreaterInt16x16 ...) => (VPCMPGTW256 ...) +(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [6] x y)) +(GreaterInt32x4 ...) => (VPCMPGTD128 ...) +(GreaterInt32x8 ...) => (VPCMPGTD256 ...) +(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [6] x y)) +(GreaterInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [6] x y)) +(GreaterInt64x4 ...) => (VPCMPGTQ256 ...) +(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [6] x y)) (GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [6] x y)) (GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [6] x y)) (GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [6] x y)) -(GreaterEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [5] x y)) +(GreaterUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [6] x y)) +(GreaterUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [6] x y)) +(GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [6] x y)) +(GreaterUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [6] x y)) +(GreaterUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [6] x y)) +(GreaterUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [6] x y)) +(GreaterUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [6] x y)) +(GreaterUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [6] x y)) +(GreaterUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [6] x y)) (GreaterEqualFloat32x4 x y) => (VCMPPS128 [5] x y) (GreaterEqualFloat32x8 x y) => (VCMPPS256 [5] x y) +(GreaterEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [5] x y)) (GreaterEqualFloat64x2 x y) => (VCMPPD128 [5] x y) (GreaterEqualFloat64x4 x y) => (VCMPPD256 [5] x y) (GreaterEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [5] x y)) -(GreaterEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [5] x y)) -(GreaterEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [5] x y)) -(GreaterEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [5] x y)) -(GreaterEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [5] x y)) -(GreaterEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [5] x y)) -(GreaterEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [5] x y)) -(GreaterEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [5] x y)) -(GreaterEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [5] x y)) -(GreaterEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [5] x y)) (GreaterEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [5] x y)) (GreaterEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [5] x y)) (GreaterEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [5] x y)) -(GreaterEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [5] x y)) -(GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [5] x y)) -(GreaterEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [5] x y)) -(GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [5] x y)) -(GreaterEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [5] x y)) -(GreaterEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [5] x y)) -(GreaterEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [5] x y)) -(GreaterEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [5] x y)) -(GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [5] x y)) +(GreaterEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [5] x y)) +(GreaterEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [5] x y)) +(GreaterEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [5] x y)) +(GreaterEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [5] x y)) +(GreaterEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [5] x y)) +(GreaterEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [5] x y)) +(GreaterEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [5] x y)) +(GreaterEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [5] x y)) +(GreaterEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [5] x y)) (GreaterEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [5] x y)) (GreaterEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [5] x y)) (GreaterEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [5] x y)) -(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) +(GreaterEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [5] x y)) +(GreaterEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [5] x y)) +(GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [5] x y)) +(GreaterEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [5] x y)) +(GreaterEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [5] x y)) +(GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [5] x y)) +(GreaterEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [5] x y)) +(GreaterEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [5] x y)) +(GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [5] x y)) (IsNanFloat32x4 x y) => (VCMPPS128 [3] x y) (IsNanFloat32x8 x y) => (VCMPPS256 [3] x y) +(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) (IsNanFloat64x2 x y) => (VCMPPD128 [3] x y) (IsNanFloat64x4 x y) => (VCMPPD256 [3] x y) (IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) -(LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y)) (LessFloat32x4 x y) => (VCMPPS128 [1] x y) (LessFloat32x8 x y) => (VCMPPS256 [1] x y) +(LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y)) (LessFloat64x2 x y) => (VCMPPD128 [1] x y) (LessFloat64x4 x y) => (VCMPPD256 [1] x y) (LessFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [1] x y)) -(LessInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [1] x y)) -(LessInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) -(LessInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [1] x y)) -(LessInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) -(LessInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [1] x y)) -(LessInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [1] x y)) -(LessInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [1] x y)) -(LessInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [1] x y)) -(LessInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) (LessInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [1] x y)) (LessInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [1] x y)) (LessInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [1] x y)) -(LessUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [1] x y)) -(LessUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) -(LessUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [1] x y)) -(LessUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) -(LessUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [1] x y)) -(LessUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [1] x y)) -(LessUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y)) -(LessUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y)) -(LessUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) +(LessInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [1] x y)) +(LessInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [1] x y)) +(LessInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) +(LessInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [1] x y)) +(LessInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [1] x y)) +(LessInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) +(LessInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [1] x y)) +(LessInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [1] x y)) +(LessInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) (LessUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [1] x y)) (LessUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [1] x y)) (LessUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [1] x y)) -(LessEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [2] x y)) +(LessUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [1] x y)) +(LessUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [1] x y)) +(LessUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) +(LessUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [1] x y)) +(LessUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [1] x y)) +(LessUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) +(LessUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y)) +(LessUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y)) +(LessUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) (LessEqualFloat32x4 x y) => (VCMPPS128 [2] x y) (LessEqualFloat32x8 x y) => (VCMPPS256 [2] x y) +(LessEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [2] x y)) (LessEqualFloat64x2 x y) => (VCMPPD128 [2] x y) (LessEqualFloat64x4 x y) => (VCMPPD256 [2] x y) (LessEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [2] x y)) -(LessEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [2] x y)) -(LessEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) -(LessEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [2] x y)) -(LessEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) -(LessEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [2] x y)) -(LessEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [2] x y)) -(LessEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [2] x y)) -(LessEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [2] x y)) -(LessEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) (LessEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [2] x y)) (LessEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [2] x y)) (LessEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [2] x y)) -(LessEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [2] x y)) -(LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) -(LessEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [2] x y)) -(LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) -(LessEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [2] x y)) -(LessEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [2] x y)) -(LessEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y)) -(LessEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y)) -(LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) +(LessEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [2] x y)) +(LessEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [2] x y)) +(LessEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) +(LessEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [2] x y)) +(LessEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [2] x y)) +(LessEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) +(LessEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [2] x y)) +(LessEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [2] x y)) +(LessEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) (LessEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [2] x y)) (LessEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [2] x y)) (LessEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [2] x y)) -(MaskedAbsoluteInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM mask)) -(MaskedAbsoluteInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM mask)) -(MaskedAbsoluteInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM mask)) -(MaskedAbsoluteInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedAbsoluteInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedAbsoluteInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedAbsoluteInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedAbsoluteInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedAbsoluteInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM mask)) +(LessEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [2] x y)) +(LessEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [2] x y)) +(LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) +(LessEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [2] x y)) +(LessEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [2] x y)) +(LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) +(LessEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y)) +(LessEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y)) +(LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) (MaskedAbsoluteInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM mask)) (MaskedAbsoluteInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM mask)) (MaskedAbsoluteInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM mask)) -(MaskedAddFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedAbsoluteInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM mask)) +(MaskedAbsoluteInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM mask)) +(MaskedAbsoluteInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM mask)) +(MaskedAbsoluteInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM mask)) +(MaskedAbsoluteInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM mask)) +(MaskedAbsoluteInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM mask)) +(MaskedAbsoluteInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM mask)) +(MaskedAbsoluteInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM mask)) +(MaskedAbsoluteInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM mask)) (MaskedAddFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedAddFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAddFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAddFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedAddFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedAddFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAddInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedAddInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedAddInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedAddInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAddInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAddInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAddInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAddInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAddInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedAddInt8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedAddInt8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedAddInt8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedAddUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedAddUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedAddUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedAddUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAddUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAddUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAddUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAddUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAddUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedAddInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedAddInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedAddInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedAddInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedAddInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAddInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedAddInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedAddInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedAddInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedAddUint8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedAddUint8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedAddUint8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedAndFloat32x16 x y mask) => (VANDPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedAddUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedAddUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedAddUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedAddUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedAddUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAddUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedAddUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedAddUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedAddUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedAndFloat32x4 x y mask) => (VANDPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedAndFloat32x8 x y mask) => (VANDPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAndFloat32x16 x y mask) => (VANDPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndFloat64x2 x y mask) => (VANDPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedAndFloat64x4 x y mask) => (VANDPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedAndFloat64x8 x y mask) => (VANDPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndInt32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedAndInt32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAndInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndInt64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedAndInt64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedAndInt64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndUint32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedAndUint32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAndUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndUint64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedAndUint64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedAndUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndNotFloat32x16 x y mask) => (VANDNPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndNotFloat32x4 x y mask) => (VANDNPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedAndNotFloat32x8 x y mask) => (VANDNPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAndNotFloat32x16 x y mask) => (VANDNPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndNotFloat64x2 x y mask) => (VANDNPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedAndNotFloat64x4 x y mask) => (VANDNPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedAndNotFloat64x8 x y mask) => (VANDNPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndNotInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndNotInt32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedAndNotInt32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAndNotInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndNotInt64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedAndNotInt64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedAndNotInt64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndNotUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndNotUint32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedAndNotUint32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedAndNotUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedAndNotUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedAndNotUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedAndNotUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedApproximateReciprocalFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM mask)) (MaskedApproximateReciprocalFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM mask)) (MaskedApproximateReciprocalFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM mask)) +(MaskedApproximateReciprocalFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM mask)) (MaskedApproximateReciprocalFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM mask)) (MaskedApproximateReciprocalFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM mask)) (MaskedApproximateReciprocalFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedApproximateReciprocalOfSqrtFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM mask)) (MaskedApproximateReciprocalOfSqrtFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM mask)) (MaskedApproximateReciprocalOfSqrtFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM mask)) +(MaskedApproximateReciprocalOfSqrtFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM mask)) (MaskedApproximateReciprocalOfSqrtFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM mask)) (MaskedApproximateReciprocalOfSqrtFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM mask)) (MaskedApproximateReciprocalOfSqrtFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedAverageUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedAverageUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedAverageUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM mask)) (MaskedAverageUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedAverageUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedAverageUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) +(MaskedAverageUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedAverageUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedAverageUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM mask)) (MaskedCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+10] x (VPMOVVec32x4ToM mask)) (MaskedCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+10] x (VPMOVVec32x8ToM mask)) +(MaskedCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) (MaskedCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+10] x (VPMOVVec64x2ToM mask)) (MaskedCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+10] x (VPMOVVec64x4ToM mask)) (MaskedCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+10] x (VPMOVVec64x8ToM mask)) -(MaskedCeilWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) (MaskedCeilWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) (MaskedCeilWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) +(MaskedCeilWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) (MaskedCeilWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) (MaskedCeilWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) (MaskedCeilWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+10] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+10] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+10] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+10] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+10] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithCeilWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithCeilWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithCeilWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithCeilWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithCeilWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+9] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+9] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+9] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+9] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+9] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithFloorWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithFloorWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithFloorWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithFloorWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithFloorWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+8] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+8] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+8] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+8] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+8] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithRoundWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithRoundWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithRoundWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithRoundWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithRoundWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+11] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+11] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+11] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+11] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+11] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithTruncWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) (MaskedDiffWithTruncWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) (MaskedDiffWithTruncWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) (MaskedDiffWithTruncWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) (MaskedDiffWithTruncWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) -(MaskedDivFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedDivFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedDivFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedDivFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedDivFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedDivFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedDivFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM mask))) (MaskedEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM mask))) (MaskedEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM mask))) +(MaskedEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM mask))) (MaskedEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM mask))) (MaskedEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM mask))) (MaskedEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM mask))) -(MaskedEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) -(MaskedEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) -(MaskedEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) -(MaskedEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) -(MaskedEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) -(MaskedEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) -(MaskedEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) -(MaskedEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) -(MaskedEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) (MaskedEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM mask))) (MaskedEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM mask))) (MaskedEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM mask))) -(MaskedEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM mask))) -(MaskedEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM mask))) -(MaskedEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM mask))) -(MaskedEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM mask))) -(MaskedEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM mask))) -(MaskedEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM mask))) -(MaskedEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM mask))) -(MaskedEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM mask))) -(MaskedEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM mask))) +(MaskedEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) +(MaskedEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) +(MaskedEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) +(MaskedEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) +(MaskedEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) +(MaskedEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) +(MaskedEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) +(MaskedEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) +(MaskedEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) (MaskedEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM mask))) (MaskedEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM mask))) (MaskedEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM mask))) -(MaskedFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) +(MaskedEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM mask))) +(MaskedEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM mask))) +(MaskedEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM mask))) +(MaskedEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM mask))) +(MaskedEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM mask))) +(MaskedEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM mask))) +(MaskedEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM mask))) +(MaskedEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM mask))) +(MaskedEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM mask))) (MaskedFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+9] x (VPMOVVec32x4ToM mask)) (MaskedFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+9] x (VPMOVVec32x8ToM mask)) +(MaskedFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) (MaskedFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+9] x (VPMOVVec64x2ToM mask)) (MaskedFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+9] x (VPMOVVec64x4ToM mask)) (MaskedFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+9] x (VPMOVVec64x8ToM mask)) -(MaskedFloorWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) (MaskedFloorWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) (MaskedFloorWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) +(MaskedFloorWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) (MaskedFloorWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) (MaskedFloorWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) (MaskedFloorWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) -(MaskedFusedMultiplyAddFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedFusedMultiplyAddFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedFusedMultiplyAddFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) +(MaskedFusedMultiplyAddFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedFusedMultiplyAddFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) (MaskedFusedMultiplyAddFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) (MaskedFusedMultiplyAddFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedFusedMultiplyAddSubFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedFusedMultiplyAddSubFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedFusedMultiplyAddSubFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM mask)) +(MaskedFusedMultiplyAddSubFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedFusedMultiplyAddSubFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM mask)) (MaskedFusedMultiplyAddSubFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM mask)) (MaskedFusedMultiplyAddSubFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedFusedMultiplySubAddFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedFusedMultiplySubAddFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedFusedMultiplySubAddFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) +(MaskedFusedMultiplySubAddFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedFusedMultiplySubAddFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) (MaskedFusedMultiplySubAddFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) (MaskedFusedMultiplySubAddFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedGreaterFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [6] x y (VPMOVVec32x16ToM mask))) (MaskedGreaterFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [6] x y (VPMOVVec32x4ToM mask))) (MaskedGreaterFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [6] x y (VPMOVVec32x8ToM mask))) +(MaskedGreaterFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [6] x y (VPMOVVec32x16ToM mask))) (MaskedGreaterFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [6] x y (VPMOVVec64x2ToM mask))) (MaskedGreaterFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [6] x y (VPMOVVec64x4ToM mask))) (MaskedGreaterFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [6] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [6] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [6] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [6] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [6] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [6] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [6] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [6] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [6] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [6] x y (VPMOVVec64x8ToM mask))) (MaskedGreaterInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [6] x y (VPMOVVec8x16ToM mask))) (MaskedGreaterInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [6] x y (VPMOVVec8x32ToM mask))) (MaskedGreaterInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [6] x y (VPMOVVec8x64ToM mask))) -(MaskedGreaterUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [6] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [6] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [6] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [6] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [6] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [6] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [6] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [6] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [6] x y (VPMOVVec64x8ToM mask))) +(MaskedGreaterInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [6] x y (VPMOVVec16x8ToM mask))) +(MaskedGreaterInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [6] x y (VPMOVVec16x16ToM mask))) +(MaskedGreaterInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [6] x y (VPMOVVec16x32ToM mask))) +(MaskedGreaterInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [6] x y (VPMOVVec32x4ToM mask))) +(MaskedGreaterInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [6] x y (VPMOVVec32x8ToM mask))) +(MaskedGreaterInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [6] x y (VPMOVVec32x16ToM mask))) +(MaskedGreaterInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [6] x y (VPMOVVec64x2ToM mask))) +(MaskedGreaterInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [6] x y (VPMOVVec64x4ToM mask))) +(MaskedGreaterInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [6] x y (VPMOVVec64x8ToM mask))) (MaskedGreaterUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [6] x y (VPMOVVec8x16ToM mask))) (MaskedGreaterUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [6] x y (VPMOVVec8x32ToM mask))) (MaskedGreaterUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [6] x y (VPMOVVec8x64ToM mask))) -(MaskedGreaterEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [5] x y (VPMOVVec32x16ToM mask))) +(MaskedGreaterUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [6] x y (VPMOVVec16x8ToM mask))) +(MaskedGreaterUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [6] x y (VPMOVVec16x16ToM mask))) +(MaskedGreaterUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [6] x y (VPMOVVec16x32ToM mask))) +(MaskedGreaterUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [6] x y (VPMOVVec32x4ToM mask))) +(MaskedGreaterUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [6] x y (VPMOVVec32x8ToM mask))) +(MaskedGreaterUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [6] x y (VPMOVVec32x16ToM mask))) +(MaskedGreaterUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [6] x y (VPMOVVec64x2ToM mask))) +(MaskedGreaterUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [6] x y (VPMOVVec64x4ToM mask))) +(MaskedGreaterUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [6] x y (VPMOVVec64x8ToM mask))) (MaskedGreaterEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [5] x y (VPMOVVec32x4ToM mask))) (MaskedGreaterEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [5] x y (VPMOVVec32x8ToM mask))) +(MaskedGreaterEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [5] x y (VPMOVVec32x16ToM mask))) (MaskedGreaterEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [5] x y (VPMOVVec64x2ToM mask))) (MaskedGreaterEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [5] x y (VPMOVVec64x4ToM mask))) (MaskedGreaterEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [5] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [5] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [5] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [5] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [5] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [5] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [5] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [5] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [5] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [5] x y (VPMOVVec64x8ToM mask))) (MaskedGreaterEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [5] x y (VPMOVVec8x16ToM mask))) (MaskedGreaterEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [5] x y (VPMOVVec8x32ToM mask))) (MaskedGreaterEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [5] x y (VPMOVVec8x64ToM mask))) -(MaskedGreaterEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [5] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [5] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [5] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [5] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [5] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [5] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [5] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [5] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [5] x y (VPMOVVec64x8ToM mask))) +(MaskedGreaterEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [5] x y (VPMOVVec16x8ToM mask))) +(MaskedGreaterEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [5] x y (VPMOVVec16x16ToM mask))) +(MaskedGreaterEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [5] x y (VPMOVVec16x32ToM mask))) +(MaskedGreaterEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [5] x y (VPMOVVec32x4ToM mask))) +(MaskedGreaterEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [5] x y (VPMOVVec32x8ToM mask))) +(MaskedGreaterEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [5] x y (VPMOVVec32x16ToM mask))) +(MaskedGreaterEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [5] x y (VPMOVVec64x2ToM mask))) +(MaskedGreaterEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [5] x y (VPMOVVec64x4ToM mask))) +(MaskedGreaterEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [5] x y (VPMOVVec64x8ToM mask))) (MaskedGreaterEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [5] x y (VPMOVVec8x16ToM mask))) (MaskedGreaterEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [5] x y (VPMOVVec8x32ToM mask))) (MaskedGreaterEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [5] x y (VPMOVVec8x64ToM mask))) -(MaskedIsNanFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM mask))) +(MaskedGreaterEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [5] x y (VPMOVVec16x8ToM mask))) +(MaskedGreaterEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [5] x y (VPMOVVec16x16ToM mask))) +(MaskedGreaterEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [5] x y (VPMOVVec16x32ToM mask))) +(MaskedGreaterEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [5] x y (VPMOVVec32x4ToM mask))) +(MaskedGreaterEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [5] x y (VPMOVVec32x8ToM mask))) +(MaskedGreaterEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [5] x y (VPMOVVec32x16ToM mask))) +(MaskedGreaterEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [5] x y (VPMOVVec64x2ToM mask))) +(MaskedGreaterEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [5] x y (VPMOVVec64x4ToM mask))) +(MaskedGreaterEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [5] x y (VPMOVVec64x8ToM mask))) (MaskedIsNanFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM mask))) (MaskedIsNanFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM mask))) +(MaskedIsNanFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM mask))) (MaskedIsNanFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM mask))) (MaskedIsNanFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM mask))) (MaskedIsNanFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM mask))) -(MaskedLessFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM mask))) (MaskedLessFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM mask))) (MaskedLessFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM mask))) +(MaskedLessFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM mask))) (MaskedLessFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM mask))) (MaskedLessFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM mask))) (MaskedLessFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM mask))) -(MaskedLessInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM mask))) -(MaskedLessInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM mask))) -(MaskedLessInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM mask))) -(MaskedLessInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM mask))) -(MaskedLessInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM mask))) -(MaskedLessInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM mask))) -(MaskedLessInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM mask))) -(MaskedLessInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM mask))) -(MaskedLessInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM mask))) (MaskedLessInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM mask))) (MaskedLessInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM mask))) (MaskedLessInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM mask))) -(MaskedLessUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM mask))) -(MaskedLessUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM mask))) -(MaskedLessUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM mask))) -(MaskedLessUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM mask))) -(MaskedLessUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM mask))) -(MaskedLessUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM mask))) -(MaskedLessUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM mask))) -(MaskedLessUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM mask))) -(MaskedLessUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM mask))) +(MaskedLessInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM mask))) +(MaskedLessInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM mask))) +(MaskedLessInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM mask))) +(MaskedLessInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM mask))) +(MaskedLessInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM mask))) +(MaskedLessInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM mask))) +(MaskedLessInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM mask))) +(MaskedLessInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM mask))) +(MaskedLessInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM mask))) (MaskedLessUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM mask))) (MaskedLessUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM mask))) (MaskedLessUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM mask))) -(MaskedLessEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM mask))) +(MaskedLessUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM mask))) +(MaskedLessUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM mask))) +(MaskedLessUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM mask))) +(MaskedLessUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM mask))) +(MaskedLessUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM mask))) +(MaskedLessUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM mask))) +(MaskedLessUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM mask))) +(MaskedLessUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM mask))) +(MaskedLessUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM mask))) (MaskedLessEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM mask))) (MaskedLessEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM mask))) +(MaskedLessEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM mask))) (MaskedLessEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM mask))) (MaskedLessEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM mask))) (MaskedLessEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM mask))) -(MaskedLessEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM mask))) -(MaskedLessEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM mask))) -(MaskedLessEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM mask))) -(MaskedLessEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM mask))) -(MaskedLessEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM mask))) -(MaskedLessEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM mask))) -(MaskedLessEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM mask))) -(MaskedLessEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM mask))) -(MaskedLessEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM mask))) (MaskedLessEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM mask))) (MaskedLessEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM mask))) (MaskedLessEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM mask))) -(MaskedLessEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM mask))) -(MaskedLessEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM mask))) -(MaskedLessEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM mask))) -(MaskedLessEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM mask))) -(MaskedLessEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM mask))) -(MaskedLessEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM mask))) -(MaskedLessEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM mask))) -(MaskedLessEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM mask))) -(MaskedLessEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM mask))) +(MaskedLessEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM mask))) +(MaskedLessEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM mask))) +(MaskedLessEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM mask))) +(MaskedLessEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM mask))) +(MaskedLessEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM mask))) +(MaskedLessEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM mask))) +(MaskedLessEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM mask))) +(MaskedLessEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM mask))) +(MaskedLessEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM mask))) (MaskedLessEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM mask))) (MaskedLessEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM mask))) (MaskedLessEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM mask))) -(MaskedMaxFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedLessEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM mask))) +(MaskedLessEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM mask))) +(MaskedLessEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM mask))) +(MaskedLessEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM mask))) +(MaskedLessEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM mask))) +(MaskedLessEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM mask))) +(MaskedLessEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM mask))) +(MaskedLessEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM mask))) +(MaskedLessEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM mask))) (MaskedMaxFloat32x4 x y mask) => (VMAXPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedMaxFloat32x8 x y mask) => (VMAXPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMaxFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedMaxFloat64x2 x y mask) => (VMAXPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedMaxFloat64x4 x y mask) => (VMAXPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedMaxFloat64x8 x y mask) => (VMAXPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMaxInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMaxInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMaxInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMaxInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMaxInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMaxInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMaxInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMaxInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMaxInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedMaxInt8x16 x y mask) => (VPMAXSBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedMaxInt8x32 x y mask) => (VPMAXSBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedMaxInt8x64 x y mask) => (VPMAXSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMaxUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMaxUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMaxUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMaxUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMaxUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMaxUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMaxUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMaxUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMaxUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedMaxInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedMaxInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedMaxInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedMaxInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedMaxInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMaxInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedMaxInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedMaxInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedMaxInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedMaxUint8x16 x y mask) => (VPMAXUBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedMaxUint8x32 x y mask) => (VPMAXUBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedMaxUint8x64 x y mask) => (VPMAXUBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMinFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedMaxUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedMaxUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedMaxUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedMaxUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedMaxUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMaxUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedMaxUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedMaxUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedMaxUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedMinFloat32x4 x y mask) => (VMINPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedMinFloat32x8 x y mask) => (VMINPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMinFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedMinFloat64x2 x y mask) => (VMINPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedMinFloat64x4 x y mask) => (VMINPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedMinFloat64x8 x y mask) => (VMINPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMinInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMinInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMinInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMinInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMinInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMinInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMinInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMinInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMinInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedMinInt8x16 x y mask) => (VPMINSBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedMinInt8x32 x y mask) => (VPMINSBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedMinInt8x64 x y mask) => (VPMINSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMinUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMinUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMinUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMinUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMinUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMinUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMinUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMinUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMinUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedMinInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedMinInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedMinInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedMinInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedMinInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMinInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedMinInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedMinInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedMinInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedMinUint8x16 x y mask) => (VPMINUBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedMinUint8x32 x y mask) => (VPMINUBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedMinUint8x64 x y mask) => (VPMINUBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMulFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedMinUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedMinUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedMinUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedMinUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedMinUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMinUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedMinUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedMinUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedMinUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedMulFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedMulFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMulFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedMulFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedMulFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedMulFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMulByPowOf2Float32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedMulByPowOf2Float32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedMulByPowOf2Float32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMulByPowOf2Float32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedMulByPowOf2Float64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedMulByPowOf2Float64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedMulByPowOf2Float64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM mask)) @@ -811,288 +811,288 @@ (MaskedMulEvenWidenUint64x2 x y mask) => (VPMULUDQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedMulEvenWidenUint64x4 x y mask) => (VPMULUDQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedMulEvenWidenUint64x8 x y mask) => (VPMULUDQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedMulHighInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM mask)) (MaskedMulHighInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM mask)) (MaskedMulHighInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMulHighInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedMulHighUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM mask)) (MaskedMulHighUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM mask)) (MaskedMulHighUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMulHighUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedMulLowInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM mask)) (MaskedMulLowInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM mask)) (MaskedMulLowInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMulLowInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMulLowInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedMulLowInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedMulLowInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedMulLowInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedMulLowInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedMulLowInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedMulLowInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedNotEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM mask))) (MaskedNotEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM mask))) (MaskedNotEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM mask))) +(MaskedNotEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM mask))) (MaskedNotEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM mask))) (MaskedNotEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM mask))) (MaskedNotEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM mask))) -(MaskedNotEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM mask))) -(MaskedNotEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM mask))) -(MaskedNotEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM mask))) -(MaskedNotEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM mask))) -(MaskedNotEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM mask))) -(MaskedNotEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM mask))) -(MaskedNotEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM mask))) -(MaskedNotEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM mask))) -(MaskedNotEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM mask))) (MaskedNotEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM mask))) (MaskedNotEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM mask))) (MaskedNotEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM mask))) -(MaskedNotEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM mask))) -(MaskedNotEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM mask))) -(MaskedNotEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM mask))) -(MaskedNotEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM mask))) -(MaskedNotEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM mask))) -(MaskedNotEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM mask))) -(MaskedNotEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM mask))) -(MaskedNotEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM mask))) -(MaskedNotEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM mask))) +(MaskedNotEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM mask))) +(MaskedNotEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM mask))) +(MaskedNotEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM mask))) +(MaskedNotEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM mask))) +(MaskedNotEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM mask))) +(MaskedNotEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM mask))) +(MaskedNotEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM mask))) +(MaskedNotEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM mask))) +(MaskedNotEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM mask))) (MaskedNotEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM mask))) (MaskedNotEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM mask))) (MaskedNotEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM mask))) -(MaskedOrFloat32x16 x y mask) => (VORPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedNotEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM mask))) +(MaskedNotEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM mask))) +(MaskedNotEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM mask))) +(MaskedNotEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM mask))) +(MaskedNotEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM mask))) +(MaskedNotEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM mask))) +(MaskedNotEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM mask))) +(MaskedNotEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM mask))) +(MaskedNotEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM mask))) (MaskedOrFloat32x4 x y mask) => (VORPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedOrFloat32x8 x y mask) => (VORPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedOrFloat32x16 x y mask) => (VORPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedOrFloat64x2 x y mask) => (VORPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedOrFloat64x4 x y mask) => (VORPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedOrFloat64x8 x y mask) => (VORPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedOrInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedOrInt32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedOrInt32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedOrInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedOrInt64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedOrInt64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedOrInt64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedOrUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedOrUint32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedOrUint32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedOrUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedOrUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedOrUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedOrUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedPairDotProdInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM mask)) (MaskedPairDotProdInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM mask)) (MaskedPairDotProdInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedPairDotProdInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) -(MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) -(MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) -(MaskedPopCountInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedPopCountInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedPopCountInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedPopCountInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedPopCountInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedPopCountInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) +(MaskedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedPopCountInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) (MaskedPopCountInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) (MaskedPopCountInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) -(MaskedPopCountUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) -(MaskedPopCountUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) -(MaskedPopCountUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) -(MaskedPopCountUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedPopCountUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedPopCountUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedPopCountUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedPopCountUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedPopCountUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) +(MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) +(MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) +(MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) +(MaskedPopCountInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) +(MaskedPopCountInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) +(MaskedPopCountInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) +(MaskedPopCountInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) +(MaskedPopCountInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) +(MaskedPopCountInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) (MaskedPopCountUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) (MaskedPopCountUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) (MaskedPopCountUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) -(MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) +(MaskedPopCountUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) +(MaskedPopCountUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) +(MaskedPopCountUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) +(MaskedPopCountUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) +(MaskedPopCountUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) +(MaskedPopCountUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) +(MaskedPopCountUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) +(MaskedPopCountUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) +(MaskedPopCountUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+8] x (VPMOVVec32x4ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+8] x (VPMOVVec32x8ToM mask)) +(MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+8] x (VPMOVVec64x2ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+8] x (VPMOVVec64x4ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+8] x (VPMOVVec64x8ToM mask)) -(MaskedRoundWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) (MaskedRoundWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) (MaskedRoundWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) +(MaskedRoundWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) (MaskedRoundWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) (MaskedRoundWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) (MaskedRoundWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) -(MaskedSaturatedAddInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedAddInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedAddInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) (MaskedSaturatedAddInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedSaturatedAddInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedSaturatedAddInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSaturatedAddUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedAddUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedAddUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSaturatedAddInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSaturatedAddInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedSaturatedAddInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) (MaskedSaturatedAddUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedSaturatedAddUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedSaturatedAddUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedSaturatedAddUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSaturatedAddUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedSaturatedAddUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) (MaskedSaturatedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedSaturatedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedSaturatedSubInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedSubInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedSubInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedSaturatedSubInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedSaturatedSubInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedSaturatedSubInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSaturatedSubUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedSubUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedSubUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSaturatedSubInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSaturatedSubInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedSaturatedSubInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) (MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) +(MaskedSaturatedSubUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSaturatedSubUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedSaturatedSubUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) (MaskedSaturatedUnsignedSignedPairDotProdUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM mask)) (MaskedSaturatedUnsignedSignedPairDotProdUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM mask)) (MaskedSaturatedUnsignedSignedPairDotProdUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM mask)) +(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM mask)) (MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM mask)) +(MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM mask)) (MaskedSqrtFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM mask)) (MaskedSqrtFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM mask)) (MaskedSqrtFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedSubFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedSubFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedSubFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedSubFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedSubFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedSubFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedSubFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedSubInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSubInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSubInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSubInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedSubInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedSubInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedSubInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedSubInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedSubInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedSubInt8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedSubInt8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedSubInt8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSubUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSubUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSubUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSubUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedSubUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedSubUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedSubUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedSubUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedSubUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedSubInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSubInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedSubInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedSubInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedSubInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedSubInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedSubInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedSubInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedSubInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedSubUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedSubUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedSubUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) +(MaskedSubUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedSubUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedSubUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedSubUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedSubUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedSubUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedSubUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedSubUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedSubUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+11] x (VPMOVVec32x4ToM mask)) (MaskedTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+11] x (VPMOVVec32x8ToM mask)) +(MaskedTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) (MaskedTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+11] x (VPMOVVec64x2ToM mask)) (MaskedTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+11] x (VPMOVVec64x4ToM mask)) (MaskedTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+11] x (VPMOVVec64x8ToM mask)) -(MaskedTruncWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) (MaskedTruncWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) (MaskedTruncWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) +(MaskedTruncWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) (MaskedTruncWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) (MaskedTruncWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) (MaskedTruncWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedXorFloat32x16 x y mask) => (VXORPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) (MaskedXorFloat32x4 x y mask) => (VXORPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedXorFloat32x8 x y mask) => (VXORPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedXorFloat32x16 x y mask) => (VXORPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedXorFloat64x2 x y mask) => (VXORPDMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedXorFloat64x4 x y mask) => (VXORPDMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedXorFloat64x8 x y mask) => (VXORPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedXorInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedXorInt32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedXorInt32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedXorInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedXorInt64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedXorInt64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedXorInt64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedXorUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedXorUint32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedXorUint32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedXorUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedXorUint64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) (MaskedXorUint64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) (MaskedXorUint64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaxFloat32x16 ...) => (VMAXPS512 ...) (MaxFloat32x4 ...) => (VMAXPS128 ...) (MaxFloat32x8 ...) => (VMAXPS256 ...) +(MaxFloat32x16 ...) => (VMAXPS512 ...) (MaxFloat64x2 ...) => (VMAXPD128 ...) (MaxFloat64x4 ...) => (VMAXPD256 ...) (MaxFloat64x8 ...) => (VMAXPD512 ...) -(MaxInt16x16 ...) => (VPMAXSW256 ...) -(MaxInt16x32 ...) => (VPMAXSW512 ...) -(MaxInt16x8 ...) => (VPMAXSW128 ...) -(MaxInt32x16 ...) => (VPMAXSD512 ...) -(MaxInt32x4 ...) => (VPMAXSD128 ...) -(MaxInt32x8 ...) => (VPMAXSD256 ...) -(MaxInt64x2 ...) => (VPMAXSQ128 ...) -(MaxInt64x4 ...) => (VPMAXSQ256 ...) -(MaxInt64x8 ...) => (VPMAXSQ512 ...) (MaxInt8x16 ...) => (VPMAXSB128 ...) (MaxInt8x32 ...) => (VPMAXSB256 ...) (MaxInt8x64 ...) => (VPMAXSB512 ...) -(MaxUint16x16 ...) => (VPMAXUW256 ...) -(MaxUint16x32 ...) => (VPMAXUW512 ...) -(MaxUint16x8 ...) => (VPMAXUW128 ...) -(MaxUint32x16 ...) => (VPMAXUD512 ...) -(MaxUint32x4 ...) => (VPMAXUD128 ...) -(MaxUint32x8 ...) => (VPMAXUD256 ...) -(MaxUint64x2 ...) => (VPMAXUQ128 ...) -(MaxUint64x4 ...) => (VPMAXUQ256 ...) -(MaxUint64x8 ...) => (VPMAXUQ512 ...) +(MaxInt16x8 ...) => (VPMAXSW128 ...) +(MaxInt16x16 ...) => (VPMAXSW256 ...) +(MaxInt16x32 ...) => (VPMAXSW512 ...) +(MaxInt32x4 ...) => (VPMAXSD128 ...) +(MaxInt32x8 ...) => (VPMAXSD256 ...) +(MaxInt32x16 ...) => (VPMAXSD512 ...) +(MaxInt64x2 ...) => (VPMAXSQ128 ...) +(MaxInt64x4 ...) => (VPMAXSQ256 ...) +(MaxInt64x8 ...) => (VPMAXSQ512 ...) (MaxUint8x16 ...) => (VPMAXUB128 ...) (MaxUint8x32 ...) => (VPMAXUB256 ...) (MaxUint8x64 ...) => (VPMAXUB512 ...) -(MinFloat32x16 ...) => (VMINPS512 ...) +(MaxUint16x8 ...) => (VPMAXUW128 ...) +(MaxUint16x16 ...) => (VPMAXUW256 ...) +(MaxUint16x32 ...) => (VPMAXUW512 ...) +(MaxUint32x4 ...) => (VPMAXUD128 ...) +(MaxUint32x8 ...) => (VPMAXUD256 ...) +(MaxUint32x16 ...) => (VPMAXUD512 ...) +(MaxUint64x2 ...) => (VPMAXUQ128 ...) +(MaxUint64x4 ...) => (VPMAXUQ256 ...) +(MaxUint64x8 ...) => (VPMAXUQ512 ...) (MinFloat32x4 ...) => (VMINPS128 ...) (MinFloat32x8 ...) => (VMINPS256 ...) +(MinFloat32x16 ...) => (VMINPS512 ...) (MinFloat64x2 ...) => (VMINPD128 ...) (MinFloat64x4 ...) => (VMINPD256 ...) (MinFloat64x8 ...) => (VMINPD512 ...) -(MinInt16x16 ...) => (VPMINSW256 ...) -(MinInt16x32 ...) => (VPMINSW512 ...) -(MinInt16x8 ...) => (VPMINSW128 ...) -(MinInt32x16 ...) => (VPMINSD512 ...) -(MinInt32x4 ...) => (VPMINSD128 ...) -(MinInt32x8 ...) => (VPMINSD256 ...) -(MinInt64x2 ...) => (VPMINSQ128 ...) -(MinInt64x4 ...) => (VPMINSQ256 ...) -(MinInt64x8 ...) => (VPMINSQ512 ...) (MinInt8x16 ...) => (VPMINSB128 ...) (MinInt8x32 ...) => (VPMINSB256 ...) (MinInt8x64 ...) => (VPMINSB512 ...) -(MinUint16x16 ...) => (VPMINUW256 ...) -(MinUint16x32 ...) => (VPMINUW512 ...) -(MinUint16x8 ...) => (VPMINUW128 ...) -(MinUint32x16 ...) => (VPMINUD512 ...) -(MinUint32x4 ...) => (VPMINUD128 ...) -(MinUint32x8 ...) => (VPMINUD256 ...) -(MinUint64x2 ...) => (VPMINUQ128 ...) -(MinUint64x4 ...) => (VPMINUQ256 ...) -(MinUint64x8 ...) => (VPMINUQ512 ...) +(MinInt16x8 ...) => (VPMINSW128 ...) +(MinInt16x16 ...) => (VPMINSW256 ...) +(MinInt16x32 ...) => (VPMINSW512 ...) +(MinInt32x4 ...) => (VPMINSD128 ...) +(MinInt32x8 ...) => (VPMINSD256 ...) +(MinInt32x16 ...) => (VPMINSD512 ...) +(MinInt64x2 ...) => (VPMINSQ128 ...) +(MinInt64x4 ...) => (VPMINSQ256 ...) +(MinInt64x8 ...) => (VPMINSQ512 ...) (MinUint8x16 ...) => (VPMINUB128 ...) (MinUint8x32 ...) => (VPMINUB256 ...) (MinUint8x64 ...) => (VPMINUB512 ...) -(MulFloat32x16 ...) => (VMULPS512 ...) +(MinUint16x8 ...) => (VPMINUW128 ...) +(MinUint16x16 ...) => (VPMINUW256 ...) +(MinUint16x32 ...) => (VPMINUW512 ...) +(MinUint32x4 ...) => (VPMINUD128 ...) +(MinUint32x8 ...) => (VPMINUD256 ...) +(MinUint32x16 ...) => (VPMINUD512 ...) +(MinUint64x2 ...) => (VPMINUQ128 ...) +(MinUint64x4 ...) => (VPMINUQ256 ...) +(MinUint64x8 ...) => (VPMINUQ512 ...) (MulFloat32x4 ...) => (VMULPS128 ...) (MulFloat32x8 ...) => (VMULPS256 ...) +(MulFloat32x16 ...) => (VMULPS512 ...) (MulFloat64x2 ...) => (VMULPD128 ...) (MulFloat64x4 ...) => (VMULPD256 ...) (MulFloat64x8 ...) => (VMULPD512 ...) -(MulByPowOf2Float32x16 ...) => (VSCALEFPS512 ...) (MulByPowOf2Float32x4 ...) => (VSCALEFPS128 ...) (MulByPowOf2Float32x8 ...) => (VSCALEFPS256 ...) +(MulByPowOf2Float32x16 ...) => (VSCALEFPS512 ...) (MulByPowOf2Float64x2 ...) => (VSCALEFPD128 ...) (MulByPowOf2Float64x4 ...) => (VSCALEFPD256 ...) (MulByPowOf2Float64x8 ...) => (VSCALEFPD512 ...) @@ -1106,282 +1106,282 @@ (MulEvenWidenUint64x2 ...) => (VPMULUDQ128 ...) (MulEvenWidenUint64x4 ...) => (VPMULUDQ256 ...) (MulEvenWidenUint64x8 ...) => (VPMULUDQ512 ...) +(MulHighInt16x8 ...) => (VPMULHW128 ...) (MulHighInt16x16 ...) => (VPMULHW256 ...) (MulHighInt16x32 ...) => (VPMULHW512 ...) -(MulHighInt16x8 ...) => (VPMULHW128 ...) +(MulHighUint16x8 ...) => (VPMULHUW128 ...) (MulHighUint16x16 ...) => (VPMULHUW256 ...) (MulHighUint16x32 ...) => (VPMULHUW512 ...) -(MulHighUint16x8 ...) => (VPMULHUW128 ...) +(MulLowInt16x8 ...) => (VPMULLW128 ...) (MulLowInt16x16 ...) => (VPMULLW256 ...) (MulLowInt16x32 ...) => (VPMULLW512 ...) -(MulLowInt16x8 ...) => (VPMULLW128 ...) -(MulLowInt32x16 ...) => (VPMULLD512 ...) (MulLowInt32x4 ...) => (VPMULLD128 ...) (MulLowInt32x8 ...) => (VPMULLD256 ...) +(MulLowInt32x16 ...) => (VPMULLD512 ...) (MulLowInt64x2 ...) => (VPMULLQ128 ...) (MulLowInt64x4 ...) => (VPMULLQ256 ...) (MulLowInt64x8 ...) => (VPMULLQ512 ...) -(NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) (NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y) (NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y) +(NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) (NotEqualFloat64x2 x y) => (VCMPPD128 [4] x y) (NotEqualFloat64x4 x y) => (VCMPPD256 [4] x y) (NotEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [4] x y)) -(NotEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [4] x y)) -(NotEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) -(NotEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [4] x y)) -(NotEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) -(NotEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [4] x y)) -(NotEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [4] x y)) -(NotEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [4] x y)) -(NotEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [4] x y)) -(NotEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) (NotEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [4] x y)) (NotEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [4] x y)) (NotEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [4] x y)) -(NotEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [4] x y)) -(NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) -(NotEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [4] x y)) -(NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) -(NotEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [4] x y)) -(NotEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [4] x y)) -(NotEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y)) -(NotEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y)) -(NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) +(NotEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [4] x y)) +(NotEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [4] x y)) +(NotEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) +(NotEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [4] x y)) +(NotEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [4] x y)) +(NotEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) +(NotEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [4] x y)) +(NotEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [4] x y)) +(NotEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) (NotEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [4] x y)) (NotEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [4] x y)) (NotEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [4] x y)) -(OrFloat32x16 ...) => (VORPS512 ...) +(NotEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [4] x y)) +(NotEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [4] x y)) +(NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) +(NotEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [4] x y)) +(NotEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [4] x y)) +(NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) +(NotEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y)) +(NotEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y)) +(NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) (OrFloat32x4 ...) => (VORPS128 ...) (OrFloat32x8 ...) => (VORPS256 ...) +(OrFloat32x16 ...) => (VORPS512 ...) (OrFloat64x2 ...) => (VORPD128 ...) (OrFloat64x4 ...) => (VORPD256 ...) (OrFloat64x8 ...) => (VORPD512 ...) -(OrInt16x16 ...) => (VPOR256 ...) +(OrInt8x16 ...) => (VPOR128 ...) +(OrInt8x32 ...) => (VPOR256 ...) (OrInt16x8 ...) => (VPOR128 ...) -(OrInt32x16 ...) => (VPORD512 ...) +(OrInt16x16 ...) => (VPOR256 ...) (OrInt32x4 ...) => (VPOR128 ...) (OrInt32x8 ...) => (VPOR256 ...) +(OrInt32x16 ...) => (VPORD512 ...) (OrInt64x2 ...) => (VPOR128 ...) (OrInt64x4 ...) => (VPOR256 ...) (OrInt64x8 ...) => (VPORQ512 ...) -(OrInt8x16 ...) => (VPOR128 ...) -(OrInt8x32 ...) => (VPOR256 ...) -(OrUint16x16 ...) => (VPOR256 ...) +(OrUint8x16 ...) => (VPOR128 ...) +(OrUint8x32 ...) => (VPOR256 ...) (OrUint16x8 ...) => (VPOR128 ...) -(OrUint32x16 ...) => (VPORD512 ...) +(OrUint16x16 ...) => (VPOR256 ...) (OrUint32x4 ...) => (VPOR128 ...) (OrUint32x8 ...) => (VPOR256 ...) +(OrUint32x16 ...) => (VPORD512 ...) (OrUint64x2 ...) => (VPOR128 ...) (OrUint64x4 ...) => (VPOR256 ...) (OrUint64x8 ...) => (VPORQ512 ...) -(OrUint8x16 ...) => (VPOR128 ...) -(OrUint8x32 ...) => (VPOR256 ...) +(PairDotProdInt16x8 ...) => (VPMADDWD128 ...) (PairDotProdInt16x16 ...) => (VPMADDWD256 ...) (PairDotProdInt16x32 ...) => (VPMADDWD512 ...) -(PairDotProdInt16x8 ...) => (VPMADDWD128 ...) -(PairDotProdAccumulateInt32x16 ...) => (VPDPWSSD512 ...) (PairDotProdAccumulateInt32x4 ...) => (VPDPWSSD128 ...) (PairDotProdAccumulateInt32x8 ...) => (VPDPWSSD256 ...) +(PairDotProdAccumulateInt32x16 ...) => (VPDPWSSD512 ...) (PairwiseAddFloat32x4 ...) => (VHADDPS128 ...) (PairwiseAddFloat32x8 ...) => (VHADDPS256 ...) (PairwiseAddFloat64x2 ...) => (VHADDPD128 ...) (PairwiseAddFloat64x4 ...) => (VHADDPD256 ...) -(PairwiseAddInt16x16 ...) => (VPHADDW256 ...) (PairwiseAddInt16x8 ...) => (VPHADDW128 ...) +(PairwiseAddInt16x16 ...) => (VPHADDW256 ...) (PairwiseAddInt32x4 ...) => (VPHADDD128 ...) (PairwiseAddInt32x8 ...) => (VPHADDD256 ...) -(PairwiseAddUint16x16 ...) => (VPHADDW256 ...) (PairwiseAddUint16x8 ...) => (VPHADDW128 ...) +(PairwiseAddUint16x16 ...) => (VPHADDW256 ...) (PairwiseAddUint32x4 ...) => (VPHADDD128 ...) (PairwiseAddUint32x8 ...) => (VPHADDD256 ...) (PairwiseSubFloat32x4 ...) => (VHSUBPS128 ...) (PairwiseSubFloat32x8 ...) => (VHSUBPS256 ...) (PairwiseSubFloat64x2 ...) => (VHSUBPD128 ...) (PairwiseSubFloat64x4 ...) => (VHSUBPD256 ...) -(PairwiseSubInt16x16 ...) => (VPHSUBW256 ...) (PairwiseSubInt16x8 ...) => (VPHSUBW128 ...) +(PairwiseSubInt16x16 ...) => (VPHSUBW256 ...) (PairwiseSubInt32x4 ...) => (VPHSUBD128 ...) (PairwiseSubInt32x8 ...) => (VPHSUBD256 ...) -(PairwiseSubUint16x16 ...) => (VPHSUBW256 ...) (PairwiseSubUint16x8 ...) => (VPHSUBW128 ...) +(PairwiseSubUint16x16 ...) => (VPHSUBW256 ...) (PairwiseSubUint32x4 ...) => (VPHSUBD128 ...) (PairwiseSubUint32x8 ...) => (VPHSUBD256 ...) -(PopCountInt16x16 ...) => (VPOPCNTW256 ...) -(PopCountInt16x32 ...) => (VPOPCNTW512 ...) -(PopCountInt16x8 ...) => (VPOPCNTW128 ...) -(PopCountInt32x16 ...) => (VPOPCNTD512 ...) -(PopCountInt32x4 ...) => (VPOPCNTD128 ...) -(PopCountInt32x8 ...) => (VPOPCNTD256 ...) -(PopCountInt64x2 ...) => (VPOPCNTQ128 ...) -(PopCountInt64x4 ...) => (VPOPCNTQ256 ...) -(PopCountInt64x8 ...) => (VPOPCNTQ512 ...) (PopCountInt8x16 ...) => (VPOPCNTB128 ...) (PopCountInt8x32 ...) => (VPOPCNTB256 ...) (PopCountInt8x64 ...) => (VPOPCNTB512 ...) -(PopCountUint16x16 ...) => (VPOPCNTW256 ...) -(PopCountUint16x32 ...) => (VPOPCNTW512 ...) -(PopCountUint16x8 ...) => (VPOPCNTW128 ...) -(PopCountUint32x16 ...) => (VPOPCNTD512 ...) -(PopCountUint32x4 ...) => (VPOPCNTD128 ...) -(PopCountUint32x8 ...) => (VPOPCNTD256 ...) -(PopCountUint64x2 ...) => (VPOPCNTQ128 ...) -(PopCountUint64x4 ...) => (VPOPCNTQ256 ...) -(PopCountUint64x8 ...) => (VPOPCNTQ512 ...) +(PopCountInt16x8 ...) => (VPOPCNTW128 ...) +(PopCountInt16x16 ...) => (VPOPCNTW256 ...) +(PopCountInt16x32 ...) => (VPOPCNTW512 ...) +(PopCountInt32x4 ...) => (VPOPCNTD128 ...) +(PopCountInt32x8 ...) => (VPOPCNTD256 ...) +(PopCountInt32x16 ...) => (VPOPCNTD512 ...) +(PopCountInt64x2 ...) => (VPOPCNTQ128 ...) +(PopCountInt64x4 ...) => (VPOPCNTQ256 ...) +(PopCountInt64x8 ...) => (VPOPCNTQ512 ...) (PopCountUint8x16 ...) => (VPOPCNTB128 ...) (PopCountUint8x32 ...) => (VPOPCNTB256 ...) (PopCountUint8x64 ...) => (VPOPCNTB512 ...) +(PopCountUint16x8 ...) => (VPOPCNTW128 ...) +(PopCountUint16x16 ...) => (VPOPCNTW256 ...) +(PopCountUint16x32 ...) => (VPOPCNTW512 ...) +(PopCountUint32x4 ...) => (VPOPCNTD128 ...) +(PopCountUint32x8 ...) => (VPOPCNTD256 ...) +(PopCountUint32x16 ...) => (VPOPCNTD512 ...) +(PopCountUint64x2 ...) => (VPOPCNTQ128 ...) +(PopCountUint64x4 ...) => (VPOPCNTQ256 ...) +(PopCountUint64x8 ...) => (VPOPCNTQ512 ...) (RoundFloat32x4 x) => (VROUNDPS128 [0] x) (RoundFloat32x8 x) => (VROUNDPS256 [0] x) (RoundFloat64x2 x) => (VROUNDPD128 [0] x) (RoundFloat64x4 x) => (VROUNDPD256 [0] x) -(RoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+8] x) (RoundSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+8] x) (RoundSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+8] x) +(RoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+8] x) (RoundSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+8] x) (RoundSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+8] x) (RoundSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+8] x) -(RoundWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x) (RoundWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x) (RoundWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x) +(RoundWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x) (RoundWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x) (RoundWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x) (RoundWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x) -(SaturatedAddInt16x16 ...) => (VPADDSW256 ...) -(SaturatedAddInt16x32 ...) => (VPADDSW512 ...) -(SaturatedAddInt16x8 ...) => (VPADDSW128 ...) (SaturatedAddInt8x16 ...) => (VPADDSB128 ...) (SaturatedAddInt8x32 ...) => (VPADDSB256 ...) (SaturatedAddInt8x64 ...) => (VPADDSB512 ...) -(SaturatedAddUint16x16 ...) => (VPADDSW256 ...) -(SaturatedAddUint16x32 ...) => (VPADDSW512 ...) -(SaturatedAddUint16x8 ...) => (VPADDSW128 ...) +(SaturatedAddInt16x8 ...) => (VPADDSW128 ...) +(SaturatedAddInt16x16 ...) => (VPADDSW256 ...) +(SaturatedAddInt16x32 ...) => (VPADDSW512 ...) (SaturatedAddUint8x16 ...) => (VPADDSB128 ...) (SaturatedAddUint8x32 ...) => (VPADDSB256 ...) (SaturatedAddUint8x64 ...) => (VPADDSB512 ...) -(SaturatedPairDotProdAccumulateInt32x16 ...) => (VPDPWSSDS512 ...) +(SaturatedAddUint16x8 ...) => (VPADDSW128 ...) +(SaturatedAddUint16x16 ...) => (VPADDSW256 ...) +(SaturatedAddUint16x32 ...) => (VPADDSW512 ...) (SaturatedPairDotProdAccumulateInt32x4 ...) => (VPDPWSSDS128 ...) (SaturatedPairDotProdAccumulateInt32x8 ...) => (VPDPWSSDS256 ...) -(SaturatedPairwiseAddInt16x16 ...) => (VPHADDSW256 ...) +(SaturatedPairDotProdAccumulateInt32x16 ...) => (VPDPWSSDS512 ...) (SaturatedPairwiseAddInt16x8 ...) => (VPHADDSW128 ...) -(SaturatedPairwiseSubInt16x16 ...) => (VPHSUBSW256 ...) +(SaturatedPairwiseAddInt16x16 ...) => (VPHADDSW256 ...) (SaturatedPairwiseSubInt16x8 ...) => (VPHSUBSW128 ...) -(SaturatedSubInt16x16 ...) => (VPSUBSW256 ...) -(SaturatedSubInt16x32 ...) => (VPSUBSW512 ...) -(SaturatedSubInt16x8 ...) => (VPSUBSW128 ...) +(SaturatedPairwiseSubInt16x16 ...) => (VPHSUBSW256 ...) (SaturatedSubInt8x16 ...) => (VPSUBSB128 ...) (SaturatedSubInt8x32 ...) => (VPSUBSB256 ...) (SaturatedSubInt8x64 ...) => (VPSUBSB512 ...) -(SaturatedSubUint16x16 ...) => (VPSUBSW256 ...) -(SaturatedSubUint16x32 ...) => (VPSUBSW512 ...) -(SaturatedSubUint16x8 ...) => (VPSUBSW128 ...) +(SaturatedSubInt16x8 ...) => (VPSUBSW128 ...) +(SaturatedSubInt16x16 ...) => (VPSUBSW256 ...) +(SaturatedSubInt16x32 ...) => (VPSUBSW512 ...) (SaturatedSubUint8x16 ...) => (VPSUBSB128 ...) (SaturatedSubUint8x32 ...) => (VPSUBSB256 ...) (SaturatedSubUint8x64 ...) => (VPSUBSB512 ...) +(SaturatedSubUint16x8 ...) => (VPSUBSW128 ...) +(SaturatedSubUint16x16 ...) => (VPSUBSW256 ...) +(SaturatedSubUint16x32 ...) => (VPSUBSW512 ...) (SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...) (SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...) (SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...) -(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...) -(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...) +(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...) +(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...) +(SetElemInt8x16 [a] x y) => (VPINSRB128 [a] x y) (SetElemInt16x8 [a] x y) => (VPINSRW128 [a] x y) (SetElemInt32x4 [a] x y) => (VPINSRD128 [a] x y) (SetElemInt64x2 [a] x y) => (VPINSRQ128 [a] x y) -(SetElemInt8x16 [a] x y) => (VPINSRB128 [a] x y) +(SetElemUint8x16 [a] x y) => (VPINSRB128 [a] x y) (SetElemUint16x8 [a] x y) => (VPINSRW128 [a] x y) (SetElemUint32x4 [a] x y) => (VPINSRD128 [a] x y) (SetElemUint64x2 [a] x y) => (VPINSRQ128 [a] x y) -(SetElemUint8x16 [a] x y) => (VPINSRB128 [a] x y) -(SignInt16x16 ...) => (VPSIGNW256 ...) -(SignInt16x8 ...) => (VPSIGNW128 ...) -(SignInt32x4 ...) => (VPSIGND128 ...) -(SignInt32x8 ...) => (VPSIGND256 ...) (SignInt8x16 ...) => (VPSIGNB128 ...) (SignInt8x32 ...) => (VPSIGNB256 ...) -(SqrtFloat32x16 ...) => (VSQRTPS512 ...) +(SignInt16x8 ...) => (VPSIGNW128 ...) +(SignInt16x16 ...) => (VPSIGNW256 ...) +(SignInt32x4 ...) => (VPSIGND128 ...) +(SignInt32x8 ...) => (VPSIGND256 ...) (SqrtFloat32x4 ...) => (VSQRTPS128 ...) (SqrtFloat32x8 ...) => (VSQRTPS256 ...) +(SqrtFloat32x16 ...) => (VSQRTPS512 ...) (SqrtFloat64x2 ...) => (VSQRTPD128 ...) (SqrtFloat64x4 ...) => (VSQRTPD256 ...) (SqrtFloat64x8 ...) => (VSQRTPD512 ...) -(SubFloat32x16 ...) => (VSUBPS512 ...) (SubFloat32x4 ...) => (VSUBPS128 ...) (SubFloat32x8 ...) => (VSUBPS256 ...) +(SubFloat32x16 ...) => (VSUBPS512 ...) (SubFloat64x2 ...) => (VSUBPD128 ...) (SubFloat64x4 ...) => (VSUBPD256 ...) (SubFloat64x8 ...) => (VSUBPD512 ...) -(SubInt16x16 ...) => (VPSUBW256 ...) -(SubInt16x32 ...) => (VPSUBW512 ...) -(SubInt16x8 ...) => (VPSUBW128 ...) -(SubInt32x16 ...) => (VPSUBD512 ...) -(SubInt32x4 ...) => (VPSUBD128 ...) -(SubInt32x8 ...) => (VPSUBD256 ...) -(SubInt64x2 ...) => (VPSUBQ128 ...) -(SubInt64x4 ...) => (VPSUBQ256 ...) -(SubInt64x8 ...) => (VPSUBQ512 ...) (SubInt8x16 ...) => (VPSUBB128 ...) (SubInt8x32 ...) => (VPSUBB256 ...) (SubInt8x64 ...) => (VPSUBB512 ...) -(SubUint16x16 ...) => (VPSUBW256 ...) -(SubUint16x32 ...) => (VPSUBW512 ...) -(SubUint16x8 ...) => (VPSUBW128 ...) -(SubUint32x16 ...) => (VPSUBD512 ...) -(SubUint32x4 ...) => (VPSUBD128 ...) -(SubUint32x8 ...) => (VPSUBD256 ...) -(SubUint64x2 ...) => (VPSUBQ128 ...) -(SubUint64x4 ...) => (VPSUBQ256 ...) -(SubUint64x8 ...) => (VPSUBQ512 ...) +(SubInt16x8 ...) => (VPSUBW128 ...) +(SubInt16x16 ...) => (VPSUBW256 ...) +(SubInt16x32 ...) => (VPSUBW512 ...) +(SubInt32x4 ...) => (VPSUBD128 ...) +(SubInt32x8 ...) => (VPSUBD256 ...) +(SubInt32x16 ...) => (VPSUBD512 ...) +(SubInt64x2 ...) => (VPSUBQ128 ...) +(SubInt64x4 ...) => (VPSUBQ256 ...) +(SubInt64x8 ...) => (VPSUBQ512 ...) (SubUint8x16 ...) => (VPSUBB128 ...) (SubUint8x32 ...) => (VPSUBB256 ...) (SubUint8x64 ...) => (VPSUBB512 ...) +(SubUint16x8 ...) => (VPSUBW128 ...) +(SubUint16x16 ...) => (VPSUBW256 ...) +(SubUint16x32 ...) => (VPSUBW512 ...) +(SubUint32x4 ...) => (VPSUBD128 ...) +(SubUint32x8 ...) => (VPSUBD256 ...) +(SubUint32x16 ...) => (VPSUBD512 ...) +(SubUint64x2 ...) => (VPSUBQ128 ...) +(SubUint64x4 ...) => (VPSUBQ256 ...) +(SubUint64x8 ...) => (VPSUBQ512 ...) (TruncFloat32x4 x) => (VROUNDPS128 [3] x) (TruncFloat32x8 x) => (VROUNDPS256 [3] x) (TruncFloat64x2 x) => (VROUNDPD128 [3] x) (TruncFloat64x4 x) => (VROUNDPD256 [3] x) -(TruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+11] x) (TruncSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+11] x) (TruncSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+11] x) +(TruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+11] x) (TruncSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+11] x) (TruncSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+11] x) (TruncSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+11] x) -(TruncWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x) (TruncWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+3] x) (TruncWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+3] x) +(TruncWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x) (TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x) (TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x) (TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x) -(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...) (UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...) (UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...) -(UnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSD512 ...) +(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...) (UnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSD128 ...) (UnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSD256 ...) -(XorFloat32x16 ...) => (VXORPS512 ...) +(UnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSD512 ...) (XorFloat32x4 ...) => (VXORPS128 ...) (XorFloat32x8 ...) => (VXORPS256 ...) +(XorFloat32x16 ...) => (VXORPS512 ...) (XorFloat64x2 ...) => (VXORPD128 ...) (XorFloat64x4 ...) => (VXORPD256 ...) (XorFloat64x8 ...) => (VXORPD512 ...) -(XorInt16x16 ...) => (VPXOR256 ...) +(XorInt8x16 ...) => (VPXOR128 ...) +(XorInt8x32 ...) => (VPXOR256 ...) (XorInt16x8 ...) => (VPXOR128 ...) -(XorInt32x16 ...) => (VPXORD512 ...) +(XorInt16x16 ...) => (VPXOR256 ...) (XorInt32x4 ...) => (VPXOR128 ...) (XorInt32x8 ...) => (VPXOR256 ...) +(XorInt32x16 ...) => (VPXORD512 ...) (XorInt64x2 ...) => (VPXOR128 ...) (XorInt64x4 ...) => (VPXOR256 ...) (XorInt64x8 ...) => (VPXORQ512 ...) -(XorInt8x16 ...) => (VPXOR128 ...) -(XorInt8x32 ...) => (VPXOR256 ...) -(XorUint16x16 ...) => (VPXOR256 ...) +(XorUint8x16 ...) => (VPXOR128 ...) +(XorUint8x32 ...) => (VPXOR256 ...) (XorUint16x8 ...) => (VPXOR128 ...) -(XorUint32x16 ...) => (VPXORD512 ...) +(XorUint16x16 ...) => (VPXOR256 ...) (XorUint32x4 ...) => (VPXOR128 ...) (XorUint32x8 ...) => (VPXOR256 ...) +(XorUint32x16 ...) => (VPXORD512 ...) (XorUint64x2 ...) => (VPXOR128 ...) (XorUint64x4 ...) => (VPXOR256 ...) (XorUint64x8 ...) => (VPXORQ512 ...) -(XorUint8x16 ...) => (VPXOR128 ...) -(XorUint8x32 ...) => (VPXOR256 ...) diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go index 8761097c44e..b5f6bb517a3 100644 --- a/src/simd/simd_wrapped_test.go +++ b/src/simd/simd_wrapped_test.go @@ -9,258 +9,6 @@ import ( "testing" ) -func testFloat32x16Binary(t *testing.T, v0 []float32, v1 []float32, want []float32, which string) { - t.Helper() - var gotv simd.Float32x16 - got := make([]float32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - vec1 := simd.LoadFloat32x16Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Div": - gotv = vec0.Div(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "Mul": - gotv = vec0.Mul(vec1) - case "MulByPowOf2": - gotv = vec0.MulByPowOf2(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testFloat32x16BinaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []float32, which string) { - t.Helper() - var gotv simd.Float32x16 - got := make([]float32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - vec1 := simd.LoadFloat32x16Slice(v1) - vec2 := simd.LoadInt32x16Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) - case "MaskedDiv": - gotv = vec0.MaskedDiv(vec1, vec2.AsMask32x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) - case "MaskedMul": - gotv = vec0.MaskedMul(vec1, vec2.AsMask32x16()) - case "MaskedMulByPowOf2": - gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask32x16()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testFloat32x16Compare(t *testing.T, v0 []float32, v1 []float32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - vec1 := simd.LoadFloat32x16Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt32x16() - case "Greater": - gotv = vec0.Greater(vec1).AsInt32x16() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt32x16() - case "IsNan": - gotv = vec0.IsNan(vec1).AsInt32x16() - case "Less": - gotv = vec0.Less(vec1).AsInt32x16() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt32x16() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt32x16() - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testFloat32x16MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - vec1 := simd.LoadFloat32x16Slice(v1) - vec2 := simd.LoadInt32x16Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedIsNan": - gotv = vec0.MaskedIsNan(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testFloat32x16Ternary(t *testing.T, v0 []float32, v1 []float32, v2 []float32, want []float32, which string) { - t.Helper() - var gotv simd.Float32x16 - got := make([]float32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - vec1 := simd.LoadFloat32x16Slice(v1) - vec2 := simd.LoadFloat32x16Slice(v2) - switch which { - case "FusedMultiplyAdd": - gotv = vec0.FusedMultiplyAdd(vec1, vec2) - case "FusedMultiplyAddSub": - gotv = vec0.FusedMultiplyAddSub(vec1, vec2) - case "FusedMultiplySubAdd": - gotv = vec0.FusedMultiplySubAdd(vec1, vec2) - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testFloat32x16TernaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []float32, v3 []int32, want []float32, which string) { - t.Helper() - var gotv simd.Float32x16 - got := make([]float32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - vec1 := simd.LoadFloat32x16Slice(v1) - vec2 := simd.LoadFloat32x16Slice(v2) - vec3 := simd.LoadInt32x16Slice(v3) - switch which { - case "MaskedFusedMultiplyAdd": - gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask32x16()) - case "MaskedFusedMultiplyAddSub": - gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask32x16()) - case "MaskedFusedMultiplySubAdd": - gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask32x16()) - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testFloat32x16Unary(t *testing.T, v0 []float32, want []float32, which string) { - t.Helper() - var gotv simd.Float32x16 - got := make([]float32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - switch which { - case "ApproximateReciprocal": - gotv = vec0.ApproximateReciprocal() - case "ApproximateReciprocalOfSqrt": - gotv = vec0.ApproximateReciprocalOfSqrt() - case "Sqrt": - gotv = vec0.Sqrt() - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testFloat32x16UnaryMasked(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) { - t.Helper() - var gotv simd.Float32x16 - got := make([]float32, len(want)) - vec0 := simd.LoadFloat32x16Slice(v0) - vec1 := simd.LoadInt32x16Slice(v1) - switch which { - case "MaskedApproximateReciprocal": - gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask32x16()) - case "MaskedApproximateReciprocalOfSqrt": - gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask32x16()) - case "MaskedSqrt": - gotv = vec0.MaskedSqrt(vec1.AsMask32x16()) - - default: - t.Errorf("Unknown method: Float32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - func testFloat32x4Binary(t *testing.T, v0 []float32, v1 []float32, want []float32, which string) { t.Helper() var gotv simd.Float32x4 @@ -793,6 +541,258 @@ func testFloat32x8UnaryMasked(t *testing.T, v0 []float32, v1 []int32, want []flo } } +func testFloat32x16Binary(t *testing.T, v0 []float32, v1 []float32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x16 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadFloat32x16Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Div": + gotv = vec0.Div(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "Mul": + gotv = vec0.Mul(vec1) + case "MulByPowOf2": + gotv = vec0.MulByPowOf2(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testFloat32x16BinaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x16 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadFloat32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) + case "MaskedDiv": + gotv = vec0.MaskedDiv(vec1, vec2.AsMask32x16()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) + case "MaskedMul": + gotv = vec0.MaskedMul(vec1, vec2.AsMask32x16()) + case "MaskedMulByPowOf2": + gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask32x16()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testFloat32x16Compare(t *testing.T, v0 []float32, v1 []float32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadFloat32x16Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt32x16() + case "Greater": + gotv = vec0.Greater(vec1).AsInt32x16() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt32x16() + case "IsNan": + gotv = vec0.IsNan(vec1).AsInt32x16() + case "Less": + gotv = vec0.Less(vec1).AsInt32x16() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt32x16() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt32x16() + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testFloat32x16MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadFloat32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedIsNan": + gotv = vec0.MaskedIsNan(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testFloat32x16Ternary(t *testing.T, v0 []float32, v1 []float32, v2 []float32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x16 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadFloat32x16Slice(v1) + vec2 := simd.LoadFloat32x16Slice(v2) + switch which { + case "FusedMultiplyAdd": + gotv = vec0.FusedMultiplyAdd(vec1, vec2) + case "FusedMultiplyAddSub": + gotv = vec0.FusedMultiplyAddSub(vec1, vec2) + case "FusedMultiplySubAdd": + gotv = vec0.FusedMultiplySubAdd(vec1, vec2) + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testFloat32x16TernaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []float32, v3 []int32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x16 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadFloat32x16Slice(v1) + vec2 := simd.LoadFloat32x16Slice(v2) + vec3 := simd.LoadInt32x16Slice(v3) + switch which { + case "MaskedFusedMultiplyAdd": + gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask32x16()) + case "MaskedFusedMultiplyAddSub": + gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask32x16()) + case "MaskedFusedMultiplySubAdd": + gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask32x16()) + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testFloat32x16Unary(t *testing.T, v0 []float32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x16 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + switch which { + case "ApproximateReciprocal": + gotv = vec0.ApproximateReciprocal() + case "ApproximateReciprocalOfSqrt": + gotv = vec0.ApproximateReciprocalOfSqrt() + case "Sqrt": + gotv = vec0.Sqrt() + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testFloat32x16UnaryMasked(t *testing.T, v0 []float32, v1 []int32, want []float32, which string) { + t.Helper() + var gotv simd.Float32x16 + got := make([]float32, len(want)) + vec0 := simd.LoadFloat32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "MaskedApproximateReciprocal": + gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask32x16()) + case "MaskedApproximateReciprocalOfSqrt": + gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask32x16()) + case "MaskedSqrt": + gotv = vec0.MaskedSqrt(vec1.AsMask32x16()) + + default: + t.Errorf("Unknown method: Float32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testFloat64x2Binary(t *testing.T, v0 []float64, v1 []float64, want []float64, which string) { t.Helper() var gotv simd.Float64x2 @@ -1579,6 +1579,779 @@ func testFloat64x8UnaryMasked(t *testing.T, v0 []float64, v1 []int64, want []flo } } +func testInt8x16Binary(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x16 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x16Slice(v0) + vec1 := simd.LoadInt8x16Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "SaturatedAdd": + gotv = vec0.SaturatedAdd(vec1) + case "SaturatedSub": + gotv = vec0.SaturatedSub(vec1) + case "Sign": + gotv = vec0.Sign(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Int8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x16BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x16 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x16Slice(v0) + vec1 := simd.LoadInt8x16Slice(v1) + vec2 := simd.LoadInt8x16Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x16()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask8x16()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask8x16()) + case "MaskedSaturatedAdd": + gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x16()) + case "MaskedSaturatedSub": + gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x16()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask8x16()) + + default: + t.Errorf("Unknown method: Int8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x16Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x16 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x16Slice(v0) + vec1 := simd.LoadInt8x16Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt8x16() + case "Greater": + gotv = vec0.Greater(vec1).AsInt8x16() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt8x16() + case "Less": + gotv = vec0.Less(vec1).AsInt8x16() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt8x16() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt8x16() + + default: + t.Errorf("Unknown method: Int8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x16MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x16 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x16Slice(v0) + vec1 := simd.LoadInt8x16Slice(v1) + vec2 := simd.LoadInt8x16Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x16()).AsInt8x16() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x16()).AsInt8x16() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x16()).AsInt8x16() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask8x16()).AsInt8x16() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x16()).AsInt8x16() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x16()).AsInt8x16() + + default: + t.Errorf("Unknown method: Int8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x16Unary(t *testing.T, v0 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x16 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x16Slice(v0) + switch which { + case "Absolute": + gotv = vec0.Absolute() + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Int8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x16UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x16 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x16Slice(v0) + vec1 := simd.LoadInt8x16Slice(v1) + switch which { + case "MaskedAbsolute": + gotv = vec0.MaskedAbsolute(vec1.AsMask8x16()) + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask8x16()) + + default: + t.Errorf("Unknown method: Int8x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x32Binary(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x32 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x32Slice(v0) + vec1 := simd.LoadInt8x32Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "SaturatedAdd": + gotv = vec0.SaturatedAdd(vec1) + case "SaturatedSub": + gotv = vec0.SaturatedSub(vec1) + case "Sign": + gotv = vec0.Sign(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Int8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x32BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x32 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x32Slice(v0) + vec1 := simd.LoadInt8x32Slice(v1) + vec2 := simd.LoadInt8x32Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x32()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask8x32()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask8x32()) + case "MaskedSaturatedAdd": + gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x32()) + case "MaskedSaturatedSub": + gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x32()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask8x32()) + + default: + t.Errorf("Unknown method: Int8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x32Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x32 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x32Slice(v0) + vec1 := simd.LoadInt8x32Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt8x32() + case "Greater": + gotv = vec0.Greater(vec1).AsInt8x32() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt8x32() + case "Less": + gotv = vec0.Less(vec1).AsInt8x32() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt8x32() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt8x32() + + default: + t.Errorf("Unknown method: Int8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x32MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x32 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x32Slice(v0) + vec1 := simd.LoadInt8x32Slice(v1) + vec2 := simd.LoadInt8x32Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x32()).AsInt8x32() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x32()).AsInt8x32() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x32()).AsInt8x32() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask8x32()).AsInt8x32() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x32()).AsInt8x32() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x32()).AsInt8x32() + + default: + t.Errorf("Unknown method: Int8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x32Unary(t *testing.T, v0 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x32 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x32Slice(v0) + switch which { + case "Absolute": + gotv = vec0.Absolute() + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Int8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x32UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x32 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x32Slice(v0) + vec1 := simd.LoadInt8x32Slice(v1) + switch which { + case "MaskedAbsolute": + gotv = vec0.MaskedAbsolute(vec1.AsMask8x32()) + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask8x32()) + + default: + t.Errorf("Unknown method: Int8x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x64Binary(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x64 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x64Slice(v0) + vec1 := simd.LoadInt8x64Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "SaturatedAdd": + gotv = vec0.SaturatedAdd(vec1) + case "SaturatedSub": + gotv = vec0.SaturatedSub(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + + default: + t.Errorf("Unknown method: Int8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x64BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x64 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x64Slice(v0) + vec1 := simd.LoadInt8x64Slice(v1) + vec2 := simd.LoadInt8x64Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x64()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask8x64()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask8x64()) + case "MaskedSaturatedAdd": + gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x64()) + case "MaskedSaturatedSub": + gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x64()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask8x64()) + + default: + t.Errorf("Unknown method: Int8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x64Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x64 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x64Slice(v0) + vec1 := simd.LoadInt8x64Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt8x64() + case "Greater": + gotv = vec0.Greater(vec1).AsInt8x64() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt8x64() + case "Less": + gotv = vec0.Less(vec1).AsInt8x64() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt8x64() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt8x64() + + default: + t.Errorf("Unknown method: Int8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x64MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x64 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x64Slice(v0) + vec1 := simd.LoadInt8x64Slice(v1) + vec2 := simd.LoadInt8x64Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x64()).AsInt8x64() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x64()).AsInt8x64() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x64()).AsInt8x64() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask8x64()).AsInt8x64() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x64()).AsInt8x64() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x64()).AsInt8x64() + + default: + t.Errorf("Unknown method: Int8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x64Unary(t *testing.T, v0 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x64 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x64Slice(v0) + switch which { + case "Absolute": + gotv = vec0.Absolute() + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Int8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt8x64UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { + t.Helper() + var gotv simd.Int8x64 + got := make([]int8, len(want)) + vec0 := simd.LoadInt8x64Slice(v0) + vec1 := simd.LoadInt8x64Slice(v1) + switch which { + case "MaskedAbsolute": + gotv = vec0.MaskedAbsolute(vec1.AsMask8x64()) + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask8x64()) + + default: + t.Errorf("Unknown method: Int8x64.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulHigh": + gotv = vec0.MulHigh(vec1) + case "MulLow": + gotv = vec0.MulLow(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "PairwiseAdd": + gotv = vec0.PairwiseAdd(vec1) + case "PairwiseSub": + gotv = vec0.PairwiseSub(vec1) + case "SaturatedAdd": + gotv = vec0.SaturatedAdd(vec1) + case "SaturatedPairwiseAdd": + gotv = vec0.SaturatedPairwiseAdd(vec1) + case "SaturatedPairwiseSub": + gotv = vec0.SaturatedPairwiseSub(vec1) + case "SaturatedSub": + gotv = vec0.SaturatedSub(vec1) + case "Sign": + gotv = vec0.Sign(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + vec2 := simd.LoadInt16x8Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x8()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask16x8()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask16x8()) + case "MaskedMulHigh": + gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x8()) + case "MaskedMulLow": + gotv = vec0.MaskedMulLow(vec1, vec2.AsMask16x8()) + case "MaskedSaturatedAdd": + gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) + case "MaskedSaturatedSub": + gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8BinaryMaskedWiden(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int32, which string) { + t.Helper() + var gotv simd.Int32x4 + got := make([]int32, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + vec2 := simd.LoadInt16x8Slice(v2) + switch which { + case "MaskedPairDotProd": + gotv = vec0.MaskedPairDotProd(vec1, vec2.AsMask16x8()) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8BinaryWiden(t *testing.T, v0 []int16, v1 []int16, want []int32, which string) { + t.Helper() + var gotv simd.Int32x4 + got := make([]int32, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + switch which { + case "PairDotProd": + gotv = vec0.PairDotProd(vec1) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt16x8() + case "Greater": + gotv = vec0.Greater(vec1).AsInt16x8() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt16x8() + case "Less": + gotv = vec0.Less(vec1).AsInt16x8() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt16x8() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt16x8() + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + vec2 := simd.LoadInt16x8Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8Unary(t *testing.T, v0 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + switch which { + case "Absolute": + gotv = vec0.Absolute() + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8UnaryMasked(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + switch which { + case "MaskedAbsolute": + gotv = vec0.MaskedAbsolute(vec1.AsMask16x8()) + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask16x8()) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt16x16Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x16 @@ -2041,527 +2814,6 @@ func testInt16x32UnaryMasked(t *testing.T, v0 []int16, v1 []int16, want []int16, } } -func testInt16x8Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulHigh": - gotv = vec0.MulHigh(vec1) - case "MulLow": - gotv = vec0.MulLow(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "PairwiseAdd": - gotv = vec0.PairwiseAdd(vec1) - case "PairwiseSub": - gotv = vec0.PairwiseSub(vec1) - case "SaturatedAdd": - gotv = vec0.SaturatedAdd(vec1) - case "SaturatedPairwiseAdd": - gotv = vec0.SaturatedPairwiseAdd(vec1) - case "SaturatedPairwiseSub": - gotv = vec0.SaturatedPairwiseSub(vec1) - case "SaturatedSub": - gotv = vec0.SaturatedSub(vec1) - case "Sign": - gotv = vec0.Sign(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - vec2 := simd.LoadInt16x8Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x8()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x8()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt16x8BinaryMaskedWiden(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int32, which string) { - t.Helper() - var gotv simd.Int32x4 - got := make([]int32, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - vec2 := simd.LoadInt16x8Slice(v2) - switch which { - case "MaskedPairDotProd": - gotv = vec0.MaskedPairDotProd(vec1, vec2.AsMask16x8()) - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt16x8BinaryWiden(t *testing.T, v0 []int16, v1 []int16, want []int32, which string) { - t.Helper() - var gotv simd.Int32x4 - got := make([]int32, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - switch which { - case "PairDotProd": - gotv = vec0.PairDotProd(vec1) - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt16x8Compare(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt16x8() - case "Greater": - gotv = vec0.Greater(vec1).AsInt16x8() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt16x8() - case "Less": - gotv = vec0.Less(vec1).AsInt16x8() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt16x8() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt16x8() - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt16x8MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - vec2 := simd.LoadInt16x8Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt16x8Unary(t *testing.T, v0 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - switch which { - case "Absolute": - gotv = vec0.Absolute() - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt16x8UnaryMasked(t *testing.T, v0 []int16, v1 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadInt16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask16x8()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x8()) - - default: - t.Errorf("Unknown method: Int16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadInt32x16Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulLow": - gotv = vec0.MulLow(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadInt32x16Slice(v1) - vec2 := simd.LoadInt32x16Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x16()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadInt32x16Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt32x16() - case "Greater": - gotv = vec0.Greater(vec1).AsInt32x16() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt32x16() - case "Less": - gotv = vec0.Less(vec1).AsInt32x16() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt32x16() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt32x16() - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16Int16x32Int16x32Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadInt16x32Slice(v1) - vec2 := simd.LoadInt16x32Slice(v2) - switch which { - case "PairDotProdAccumulate": - gotv = vec0.PairDotProdAccumulate(vec1, vec2) - case "SaturatedPairDotProdAccumulate": - gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2) - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16Int16x32Int16x32Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadInt16x32Slice(v1) - vec2 := simd.LoadInt16x32Slice(v2) - vec3 := simd.LoadInt32x16Slice(v3) - switch which { - case "MaskedPairDotProdAccumulate": - gotv = vec0.MaskedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - case "MaskedSaturatedPairDotProdAccumulate": - gotv = vec0.MaskedSaturatedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadInt32x16Slice(v1) - vec2 := simd.LoadInt32x16Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16Uint8x64Int8x64Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadUint8x64Slice(v1) - vec2 := simd.LoadInt8x64Slice(v2) - switch which { - case "SaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) - case "UnsignedSignedQuadDotProdAccumulate": - gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16Uint8x64Int8x64Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadUint8x64Slice(v1) - vec2 := simd.LoadInt8x64Slice(v2) - vec3 := simd.LoadInt32x16Slice(v3) - switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16Unary(t *testing.T, v0 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - switch which { - case "Absolute": - gotv = vec0.Absolute() - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt32x16UnaryMasked(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadInt32x16Slice(v0) - vec1 := simd.LoadInt32x16Slice(v1) - switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask32x16()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x16()) - - default: - t.Errorf("Unknown method: Int32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - func testInt32x4Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { t.Helper() var gotv simd.Int32x4 @@ -3178,6 +3430,287 @@ func testInt32x8UnaryMasked(t *testing.T, v0 []int32, v1 []int32, want []int32, } } +func testInt32x16Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulLow": + gotv = vec0.MulLow(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) + case "MaskedMulLow": + gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x16()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt32x16() + case "Greater": + gotv = vec0.Greater(vec1).AsInt32x16() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt32x16() + case "Less": + gotv = vec0.Less(vec1).AsInt32x16() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt32x16() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt32x16() + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16Int16x32Int16x32Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt16x32Slice(v1) + vec2 := simd.LoadInt16x32Slice(v2) + switch which { + case "PairDotProdAccumulate": + gotv = vec0.PairDotProdAccumulate(vec1, vec2) + case "SaturatedPairDotProdAccumulate": + gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16Int16x32Int16x32Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt16x32Slice(v1) + vec2 := simd.LoadInt16x32Slice(v2) + vec3 := simd.LoadInt32x16Slice(v3) + switch which { + case "MaskedPairDotProdAccumulate": + gotv = vec0.MaskedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + case "MaskedSaturatedPairDotProdAccumulate": + gotv = vec0.MaskedSaturatedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16Uint8x64Int8x64Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadUint8x64Slice(v1) + vec2 := simd.LoadInt8x64Slice(v2) + switch which { + case "SaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) + case "UnsignedSignedQuadDotProdAccumulate": + gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16Uint8x64Int8x64Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadUint8x64Slice(v1) + vec2 := simd.LoadInt8x64Slice(v2) + vec3 := simd.LoadInt32x16Slice(v3) + switch which { + case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + case "MaskedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16Unary(t *testing.T, v0 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + switch which { + case "Absolute": + gotv = vec0.Absolute() + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16UnaryMasked(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "MaskedAbsolute": + gotv = vec0.MaskedAbsolute(vec1.AsMask32x16()) + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask32x16()) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x2 @@ -3739,2347 +4272,6 @@ func testInt64x8UnaryMasked(t *testing.T, v0 []int64, v1 []int64, want []int64, } } -func testInt8x16Binary(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x16 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x16Slice(v0) - vec1 := simd.LoadInt8x16Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "SaturatedAdd": - gotv = vec0.SaturatedAdd(vec1) - case "SaturatedSub": - gotv = vec0.SaturatedSub(vec1) - case "Sign": - gotv = vec0.Sign(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Int8x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x16BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x16 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x16Slice(v0) - vec1 := simd.LoadInt8x16Slice(v1) - vec2 := simd.LoadInt8x16Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x16()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x16()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x16()) - - default: - t.Errorf("Unknown method: Int8x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x16Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x16 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x16Slice(v0) - vec1 := simd.LoadInt8x16Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt8x16() - case "Greater": - gotv = vec0.Greater(vec1).AsInt8x16() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt8x16() - case "Less": - gotv = vec0.Less(vec1).AsInt8x16() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt8x16() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt8x16() - - default: - t.Errorf("Unknown method: Int8x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x16MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x16 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x16Slice(v0) - vec1 := simd.LoadInt8x16Slice(v1) - vec2 := simd.LoadInt8x16Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - - default: - t.Errorf("Unknown method: Int8x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x16Unary(t *testing.T, v0 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x16 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x16Slice(v0) - switch which { - case "Absolute": - gotv = vec0.Absolute() - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Int8x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x16UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x16 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x16Slice(v0) - vec1 := simd.LoadInt8x16Slice(v1) - switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask8x16()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x16()) - - default: - t.Errorf("Unknown method: Int8x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x32Binary(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x32 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x32Slice(v0) - vec1 := simd.LoadInt8x32Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "SaturatedAdd": - gotv = vec0.SaturatedAdd(vec1) - case "SaturatedSub": - gotv = vec0.SaturatedSub(vec1) - case "Sign": - gotv = vec0.Sign(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Int8x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x32BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x32 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x32Slice(v0) - vec1 := simd.LoadInt8x32Slice(v1) - vec2 := simd.LoadInt8x32Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x32()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x32()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x32()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x32()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x32()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x32()) - - default: - t.Errorf("Unknown method: Int8x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x32Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x32 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x32Slice(v0) - vec1 := simd.LoadInt8x32Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt8x32() - case "Greater": - gotv = vec0.Greater(vec1).AsInt8x32() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt8x32() - case "Less": - gotv = vec0.Less(vec1).AsInt8x32() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt8x32() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt8x32() - - default: - t.Errorf("Unknown method: Int8x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x32MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x32 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x32Slice(v0) - vec1 := simd.LoadInt8x32Slice(v1) - vec2 := simd.LoadInt8x32Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - - default: - t.Errorf("Unknown method: Int8x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x32Unary(t *testing.T, v0 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x32 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x32Slice(v0) - switch which { - case "Absolute": - gotv = vec0.Absolute() - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Int8x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x32UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x32 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x32Slice(v0) - vec1 := simd.LoadInt8x32Slice(v1) - switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask8x32()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x32()) - - default: - t.Errorf("Unknown method: Int8x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x64Binary(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x64 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x64Slice(v0) - vec1 := simd.LoadInt8x64Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "SaturatedAdd": - gotv = vec0.SaturatedAdd(vec1) - case "SaturatedSub": - gotv = vec0.SaturatedSub(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - - default: - t.Errorf("Unknown method: Int8x64.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x64BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x64 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x64Slice(v0) - vec1 := simd.LoadInt8x64Slice(v1) - vec2 := simd.LoadInt8x64Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x64()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x64()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x64()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x64()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x64()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x64()) - - default: - t.Errorf("Unknown method: Int8x64.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x64Compare(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x64 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x64Slice(v0) - vec1 := simd.LoadInt8x64Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt8x64() - case "Greater": - gotv = vec0.Greater(vec1).AsInt8x64() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt8x64() - case "Less": - gotv = vec0.Less(vec1).AsInt8x64() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt8x64() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt8x64() - - default: - t.Errorf("Unknown method: Int8x64.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x64MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x64 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x64Slice(v0) - vec1 := simd.LoadInt8x64Slice(v1) - vec2 := simd.LoadInt8x64Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - - default: - t.Errorf("Unknown method: Int8x64.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x64Unary(t *testing.T, v0 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x64 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x64Slice(v0) - switch which { - case "Absolute": - gotv = vec0.Absolute() - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Int8x64.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testInt8x64UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, which string) { - t.Helper() - var gotv simd.Int8x64 - got := make([]int8, len(want)) - vec0 := simd.LoadInt8x64Slice(v0) - vec1 := simd.LoadInt8x64Slice(v1) - switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask8x64()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x64()) - - default: - t.Errorf("Unknown method: Int8x64.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x16Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x16 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x16Slice(v0) - vec1 := simd.LoadUint16x16Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Average": - gotv = vec0.Average(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulHigh": - gotv = vec0.MulHigh(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "PairwiseAdd": - gotv = vec0.PairwiseAdd(vec1) - case "PairwiseSub": - gotv = vec0.PairwiseSub(vec1) - case "SaturatedAdd": - gotv = vec0.SaturatedAdd(vec1) - case "SaturatedSub": - gotv = vec0.SaturatedSub(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint16x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x16 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x16Slice(v0) - vec1 := simd.LoadUint16x16Slice(v1) - vec2 := simd.LoadInt16x16Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x16()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x16()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x16()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16()) - - default: - t.Errorf("Unknown method: Uint16x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x16Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x16 - got := make([]int16, len(want)) - vec0 := simd.LoadUint16x16Slice(v0) - vec1 := simd.LoadUint16x16Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt16x16() - case "Greater": - gotv = vec0.Greater(vec1).AsInt16x16() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt16x16() - case "Less": - gotv = vec0.Less(vec1).AsInt16x16() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt16x16() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt16x16() - - default: - t.Errorf("Unknown method: Uint16x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x16MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x16 - got := make([]int16, len(want)) - vec0 := simd.LoadUint16x16Slice(v0) - vec1 := simd.LoadUint16x16Slice(v1) - vec2 := simd.LoadInt16x16Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - - default: - t.Errorf("Unknown method: Uint16x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x16Unary(t *testing.T, v0 []uint16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x16 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x16Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint16x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x16UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x16 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x16Slice(v0) - vec1 := simd.LoadInt16x16Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x16()) - - default: - t.Errorf("Unknown method: Uint16x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x32Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x32 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x32Slice(v0) - vec1 := simd.LoadUint16x32Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "Average": - gotv = vec0.Average(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulHigh": - gotv = vec0.MulHigh(vec1) - case "SaturatedAdd": - gotv = vec0.SaturatedAdd(vec1) - case "SaturatedSub": - gotv = vec0.SaturatedSub(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - - default: - t.Errorf("Unknown method: Uint16x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x32 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x32Slice(v0) - vec1 := simd.LoadUint16x32Slice(v1) - vec2 := simd.LoadInt16x32Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x32()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x32()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x32()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x32()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x32()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32()) - - default: - t.Errorf("Unknown method: Uint16x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x32Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x32 - got := make([]int16, len(want)) - vec0 := simd.LoadUint16x32Slice(v0) - vec1 := simd.LoadUint16x32Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt16x32() - case "Greater": - gotv = vec0.Greater(vec1).AsInt16x32() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt16x32() - case "Less": - gotv = vec0.Less(vec1).AsInt16x32() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt16x32() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt16x32() - - default: - t.Errorf("Unknown method: Uint16x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x32MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x32 - got := make([]int16, len(want)) - vec0 := simd.LoadUint16x32Slice(v0) - vec1 := simd.LoadUint16x32Slice(v1) - vec2 := simd.LoadInt16x32Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - - default: - t.Errorf("Unknown method: Uint16x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x32Unary(t *testing.T, v0 []uint16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x32 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x32Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint16x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x32UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x32 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x32Slice(v0) - vec1 := simd.LoadInt16x32Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x32()) - - default: - t.Errorf("Unknown method: Uint16x32.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x8Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x8 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x8Slice(v0) - vec1 := simd.LoadUint16x8Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Average": - gotv = vec0.Average(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulHigh": - gotv = vec0.MulHigh(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "PairwiseAdd": - gotv = vec0.PairwiseAdd(vec1) - case "PairwiseSub": - gotv = vec0.PairwiseSub(vec1) - case "SaturatedAdd": - gotv = vec0.SaturatedAdd(vec1) - case "SaturatedSub": - gotv = vec0.SaturatedSub(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x8 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x8Slice(v0) - vec1 := simd.LoadUint16x8Slice(v1) - vec2 := simd.LoadInt16x8Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x8()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x8()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) - - default: - t.Errorf("Unknown method: Uint16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x8Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadUint16x8Slice(v0) - vec1 := simd.LoadUint16x8Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt16x8() - case "Greater": - gotv = vec0.Greater(vec1).AsInt16x8() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt16x8() - case "Less": - gotv = vec0.Less(vec1).AsInt16x8() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt16x8() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt16x8() - - default: - t.Errorf("Unknown method: Uint16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x8MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { - t.Helper() - var gotv simd.Int16x8 - got := make([]int16, len(want)) - vec0 := simd.LoadUint16x8Slice(v0) - vec1 := simd.LoadUint16x8Slice(v1) - vec2 := simd.LoadInt16x8Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - - default: - t.Errorf("Unknown method: Uint16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x8Unary(t *testing.T, v0 []uint16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x8 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x8Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint16x8UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { - t.Helper() - var gotv simd.Uint16x8 - got := make([]uint16, len(want)) - vec0 := simd.LoadUint16x8Slice(v0) - vec1 := simd.LoadInt16x8Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x8()) - - default: - t.Errorf("Unknown method: Uint16x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x16 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - vec1 := simd.LoadUint32x16Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x16 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - vec1 := simd.LoadUint32x16Slice(v1) - vec2 := simd.LoadInt32x16Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - vec1 := simd.LoadUint32x16Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt32x16() - case "Greater": - gotv = vec0.Greater(vec1).AsInt32x16() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt32x16() - case "Less": - gotv = vec0.Less(vec1).AsInt32x16() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt32x16() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt32x16() - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x16 - got := make([]int32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - vec1 := simd.LoadUint32x16Slice(v1) - vec2 := simd.LoadInt32x16Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16Uint8x64Int8x64Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x16 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - vec1 := simd.LoadUint8x64Slice(v1) - vec2 := simd.LoadInt8x64Slice(v2) - vec3 := simd.LoadInt32x16Slice(v3) - switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16Uint8x64Int8x64Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x16 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - vec1 := simd.LoadUint8x64Slice(v1) - vec2 := simd.LoadInt8x64Slice(v2) - switch which { - case "SaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) - case "UnsignedSignedQuadDotProdAccumulate": - gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16Unary(t *testing.T, v0 []uint32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x16 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x16UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x16 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x16Slice(v0) - vec1 := simd.LoadInt32x16Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x16()) - - default: - t.Errorf("Unknown method: Uint32x16.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x4 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadUint32x4Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "PairwiseAdd": - gotv = vec0.PairwiseAdd(vec1) - case "PairwiseSub": - gotv = vec0.PairwiseSub(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x4 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadUint32x4Slice(v1) - vec2 := simd.LoadInt32x4Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x4()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x4()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x4()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x4()) - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4BinaryWiden(t *testing.T, v0 []uint32, v1 []uint32, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x2 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadUint32x4Slice(v1) - switch which { - case "MulEvenWiden": - gotv = vec0.MulEvenWiden(vec1) - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x4 - got := make([]int32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadUint32x4Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt32x4() - case "Greater": - gotv = vec0.Greater(vec1).AsInt32x4() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt32x4() - case "Less": - gotv = vec0.Less(vec1).AsInt32x4() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt32x4() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt32x4() - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x4 - got := make([]int32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadUint32x4Slice(v1) - vec2 := simd.LoadInt32x4Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4Uint8x16Int8x16Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x4 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadUint8x16Slice(v1) - vec2 := simd.LoadInt8x16Slice(v2) - vec3 := simd.LoadInt32x4Slice(v3) - switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4Uint8x16Int8x16Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x4 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadUint8x16Slice(v1) - vec2 := simd.LoadInt8x16Slice(v2) - switch which { - case "SaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) - case "UnsignedSignedQuadDotProdAccumulate": - gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4Unary(t *testing.T, v0 []uint32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x4 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x4UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x4 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x4Slice(v0) - vec1 := simd.LoadInt32x4Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x4()) - - default: - t.Errorf("Unknown method: Uint32x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x8 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadUint32x8Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "PairwiseAdd": - gotv = vec0.PairwiseAdd(vec1) - case "PairwiseSub": - gotv = vec0.PairwiseSub(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x8 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadUint32x8Slice(v1) - vec2 := simd.LoadInt32x8Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x8()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x8()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x8()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x8()) - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8BinaryWiden(t *testing.T, v0 []uint32, v1 []uint32, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x4 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadUint32x8Slice(v1) - switch which { - case "MulEvenWiden": - gotv = vec0.MulEvenWiden(vec1) - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x8 - got := make([]int32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadUint32x8Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt32x8() - case "Greater": - gotv = vec0.Greater(vec1).AsInt32x8() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt32x8() - case "Less": - gotv = vec0.Less(vec1).AsInt32x8() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt32x8() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt32x8() - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { - t.Helper() - var gotv simd.Int32x8 - got := make([]int32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadUint32x8Slice(v1) - vec2 := simd.LoadInt32x8Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8Uint8x32Int8x32Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x8 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadUint8x32Slice(v1) - vec2 := simd.LoadInt8x32Slice(v2) - vec3 := simd.LoadInt32x8Slice(v3) - switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8Uint8x32Int8x32Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x8 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadUint8x32Slice(v1) - vec2 := simd.LoadInt8x32Slice(v2) - switch which { - case "SaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) - case "UnsignedSignedQuadDotProdAccumulate": - gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8Unary(t *testing.T, v0 []uint32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x8 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint32x8UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { - t.Helper() - var gotv simd.Uint32x8 - got := make([]uint32, len(want)) - vec0 := simd.LoadUint32x8Slice(v0) - vec1 := simd.LoadInt32x8Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x8()) - - default: - t.Errorf("Unknown method: Uint32x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x2Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x2 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x2Slice(v0) - vec1 := simd.LoadUint64x2Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulEvenWiden": - gotv = vec0.MulEvenWiden(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint64x2.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x2 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x2Slice(v0) - vec1 := simd.LoadUint64x2Slice(v1) - vec2 := simd.LoadInt64x2Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x2()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x2()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x2()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x2()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x2()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x2()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x2()) - - default: - t.Errorf("Unknown method: Uint64x2.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x2Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, which string) { - t.Helper() - var gotv simd.Int64x2 - got := make([]int64, len(want)) - vec0 := simd.LoadUint64x2Slice(v0) - vec1 := simd.LoadUint64x2Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt64x2() - case "Greater": - gotv = vec0.Greater(vec1).AsInt64x2() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt64x2() - case "Less": - gotv = vec0.Less(vec1).AsInt64x2() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt64x2() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt64x2() - - default: - t.Errorf("Unknown method: Uint64x2.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x2MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { - t.Helper() - var gotv simd.Int64x2 - got := make([]int64, len(want)) - vec0 := simd.LoadUint64x2Slice(v0) - vec1 := simd.LoadUint64x2Slice(v1) - vec2 := simd.LoadInt64x2Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - - default: - t.Errorf("Unknown method: Uint64x2.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x2Unary(t *testing.T, v0 []uint64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x2 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x2Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint64x2.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x2UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x2 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x2Slice(v0) - vec1 := simd.LoadInt64x2Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x2()) - - default: - t.Errorf("Unknown method: Uint64x2.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x4Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x4 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x4Slice(v0) - vec1 := simd.LoadUint64x4Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulEvenWiden": - gotv = vec0.MulEvenWiden(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint64x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x4 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x4Slice(v0) - vec1 := simd.LoadUint64x4Slice(v1) - vec2 := simd.LoadInt64x4Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x4()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x4()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x4()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x4()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x4()) - - default: - t.Errorf("Unknown method: Uint64x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x4Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, which string) { - t.Helper() - var gotv simd.Int64x4 - got := make([]int64, len(want)) - vec0 := simd.LoadUint64x4Slice(v0) - vec1 := simd.LoadUint64x4Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt64x4() - case "Greater": - gotv = vec0.Greater(vec1).AsInt64x4() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt64x4() - case "Less": - gotv = vec0.Less(vec1).AsInt64x4() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt64x4() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt64x4() - - default: - t.Errorf("Unknown method: Uint64x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x4MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { - t.Helper() - var gotv simd.Int64x4 - got := make([]int64, len(want)) - vec0 := simd.LoadUint64x4Slice(v0) - vec1 := simd.LoadUint64x4Slice(v1) - vec2 := simd.LoadInt64x4Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - - default: - t.Errorf("Unknown method: Uint64x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x4Unary(t *testing.T, v0 []uint64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x4 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x4Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint64x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x4UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x4 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x4Slice(v0) - vec1 := simd.LoadInt64x4Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x4()) - - default: - t.Errorf("Unknown method: Uint64x4.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x8Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x8 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x8Slice(v0) - vec1 := simd.LoadUint64x8Slice(v1) - switch which { - case "Add": - gotv = vec0.Add(vec1) - case "And": - gotv = vec0.And(vec1) - case "AndNot": - gotv = vec0.AndNot(vec1) - case "Max": - gotv = vec0.Max(vec1) - case "Min": - gotv = vec0.Min(vec1) - case "MulEvenWiden": - gotv = vec0.MulEvenWiden(vec1) - case "Or": - gotv = vec0.Or(vec1) - case "Sub": - gotv = vec0.Sub(vec1) - case "Xor": - gotv = vec0.Xor(vec1) - - default: - t.Errorf("Unknown method: Uint64x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x8 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x8Slice(v0) - vec1 := simd.LoadUint64x8Slice(v1) - vec2 := simd.LoadInt64x8Slice(v2) - switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x8()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x8()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x8()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x8()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x8()) - - default: - t.Errorf("Unknown method: Uint64x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x8Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, which string) { - t.Helper() - var gotv simd.Int64x8 - got := make([]int64, len(want)) - vec0 := simd.LoadUint64x8Slice(v0) - vec1 := simd.LoadUint64x8Slice(v1) - switch which { - case "Equal": - gotv = vec0.Equal(vec1).AsInt64x8() - case "Greater": - gotv = vec0.Greater(vec1).AsInt64x8() - case "GreaterEqual": - gotv = vec0.GreaterEqual(vec1).AsInt64x8() - case "Less": - gotv = vec0.Less(vec1).AsInt64x8() - case "LessEqual": - gotv = vec0.LessEqual(vec1).AsInt64x8() - case "NotEqual": - gotv = vec0.NotEqual(vec1).AsInt64x8() - - default: - t.Errorf("Unknown method: Uint64x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x8MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { - t.Helper() - var gotv simd.Int64x8 - got := make([]int64, len(want)) - vec0 := simd.LoadUint64x8Slice(v0) - vec1 := simd.LoadUint64x8Slice(v1) - vec2 := simd.LoadInt64x8Slice(v2) - switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - - default: - t.Errorf("Unknown method: Uint64x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x8Unary(t *testing.T, v0 []uint64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x8 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x8Slice(v0) - switch which { - case "PopCount": - gotv = vec0.PopCount() - - default: - t.Errorf("Unknown method: Uint64x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - -func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { - t.Helper() - var gotv simd.Uint64x8 - got := make([]uint64, len(want)) - vec0 := simd.LoadUint64x8Slice(v0) - vec1 := simd.LoadInt64x8Slice(v1) - switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x8()) - - default: - t.Errorf("Unknown method: Uint64x8.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} - func testUint8x16Binary(t *testing.T, v0 []uint8, v1 []uint8, want []uint8, which string) { t.Helper() var gotv simd.Uint8x16 @@ -6737,3 +4929,1811 @@ func testUint8x64UnaryMasked(t *testing.T, v0 []uint8, v1 []int8, want []uint8, } } } + +func testUint16x8Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x8 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadUint16x8Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Average": + gotv = vec0.Average(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulHigh": + gotv = vec0.MulHigh(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "PairwiseAdd": + gotv = vec0.PairwiseAdd(vec1) + case "PairwiseSub": + gotv = vec0.PairwiseSub(vec1) + case "SaturatedAdd": + gotv = vec0.SaturatedAdd(vec1) + case "SaturatedSub": + gotv = vec0.SaturatedSub(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x8 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadUint16x8Slice(v1) + vec2 := simd.LoadInt16x8Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x8()) + case "MaskedAverage": + gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x8()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask16x8()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask16x8()) + case "MaskedMulHigh": + gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x8()) + case "MaskedSaturatedAdd": + gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) + case "MaskedSaturatedSub": + gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x8Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadUint16x8Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt16x8() + case "Greater": + gotv = vec0.Greater(vec1).AsInt16x8() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt16x8() + case "Less": + gotv = vec0.Less(vec1).AsInt16x8() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt16x8() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt16x8() + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x8MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadUint16x8Slice(v1) + vec2 := simd.LoadInt16x8Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x8Unary(t *testing.T, v0 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x8 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x8UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x8 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask16x8()) + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x16Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x16 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadUint16x16Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Average": + gotv = vec0.Average(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulHigh": + gotv = vec0.MulHigh(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "PairwiseAdd": + gotv = vec0.PairwiseAdd(vec1) + case "PairwiseSub": + gotv = vec0.PairwiseSub(vec1) + case "SaturatedAdd": + gotv = vec0.SaturatedAdd(vec1) + case "SaturatedSub": + gotv = vec0.SaturatedSub(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x16 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadUint16x16Slice(v1) + vec2 := simd.LoadInt16x16Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x16()) + case "MaskedAverage": + gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x16()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask16x16()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask16x16()) + case "MaskedMulHigh": + gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x16()) + case "MaskedSaturatedAdd": + gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16()) + case "MaskedSaturatedSub": + gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16()) + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x16Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x16 + got := make([]int16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadUint16x16Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt16x16() + case "Greater": + gotv = vec0.Greater(vec1).AsInt16x16() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt16x16() + case "Less": + gotv = vec0.Less(vec1).AsInt16x16() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt16x16() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt16x16() + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x16MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x16 + got := make([]int16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadUint16x16Slice(v1) + vec2 := simd.LoadInt16x16Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x16()).AsInt16x16() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x16()).AsInt16x16() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x16()).AsInt16x16() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask16x16()).AsInt16x16() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x16()).AsInt16x16() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x16()).AsInt16x16() + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x16Unary(t *testing.T, v0 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x16 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x16UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x16 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadInt16x16Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask16x16()) + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x32Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x32 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadUint16x32Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "Average": + gotv = vec0.Average(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulHigh": + gotv = vec0.MulHigh(vec1) + case "SaturatedAdd": + gotv = vec0.SaturatedAdd(vec1) + case "SaturatedSub": + gotv = vec0.SaturatedSub(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x32 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadUint16x32Slice(v1) + vec2 := simd.LoadInt16x32Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x32()) + case "MaskedAverage": + gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x32()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask16x32()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask16x32()) + case "MaskedMulHigh": + gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x32()) + case "MaskedSaturatedAdd": + gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32()) + case "MaskedSaturatedSub": + gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32()) + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x32Compare(t *testing.T, v0 []uint16, v1 []uint16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x32 + got := make([]int16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadUint16x32Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt16x32() + case "Greater": + gotv = vec0.Greater(vec1).AsInt16x32() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt16x32() + case "Less": + gotv = vec0.Less(vec1).AsInt16x32() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt16x32() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt16x32() + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x32MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x32 + got := make([]int16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadUint16x32Slice(v1) + vec2 := simd.LoadInt16x32Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x32()).AsInt16x32() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x32()).AsInt16x32() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x32()).AsInt16x32() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask16x32()).AsInt16x32() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x32()).AsInt16x32() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x32()).AsInt16x32() + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x32Unary(t *testing.T, v0 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x32 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x32UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x32 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadInt16x32Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask16x32()) + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint32x4Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "PairwiseAdd": + gotv = vec0.PairwiseAdd(vec1) + case "PairwiseSub": + gotv = vec0.PairwiseSub(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint32x4Slice(v1) + vec2 := simd.LoadInt32x4Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x4()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x4()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x4()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask32x4()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask32x4()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask32x4()) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4BinaryWiden(t *testing.T, v0 []uint32, v1 []uint32, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint32x4Slice(v1) + switch which { + case "MulEvenWiden": + gotv = vec0.MulEvenWiden(vec1) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x4 + got := make([]int32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint32x4Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt32x4() + case "Greater": + gotv = vec0.Greater(vec1).AsInt32x4() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt32x4() + case "Less": + gotv = vec0.Less(vec1).AsInt32x4() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt32x4() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt32x4() + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x4 + got := make([]int32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint32x4Slice(v1) + vec2 := simd.LoadInt32x4Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x4()).AsInt32x4() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x4()).AsInt32x4() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x4()).AsInt32x4() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask32x4()).AsInt32x4() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x4()).AsInt32x4() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x4()).AsInt32x4() + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4Uint8x16Int8x16Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint8x16Slice(v1) + vec2 := simd.LoadInt8x16Slice(v2) + vec3 := simd.LoadInt32x4Slice(v3) + switch which { + case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) + case "MaskedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4Uint8x16Int8x16Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint8x16Slice(v1) + vec2 := simd.LoadInt8x16Slice(v2) + switch which { + case "SaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) + case "UnsignedSignedQuadDotProdAccumulate": + gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4Unary(t *testing.T, v0 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadInt32x4Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask32x4()) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint32x8Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "PairwiseAdd": + gotv = vec0.PairwiseAdd(vec1) + case "PairwiseSub": + gotv = vec0.PairwiseSub(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint32x8Slice(v1) + vec2 := simd.LoadInt32x8Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x8()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x8()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x8()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask32x8()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask32x8()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask32x8()) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8BinaryWiden(t *testing.T, v0 []uint32, v1 []uint32, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint32x8Slice(v1) + switch which { + case "MulEvenWiden": + gotv = vec0.MulEvenWiden(vec1) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x8 + got := make([]int32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint32x8Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt32x8() + case "Greater": + gotv = vec0.Greater(vec1).AsInt32x8() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt32x8() + case "Less": + gotv = vec0.Less(vec1).AsInt32x8() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt32x8() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt32x8() + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x8 + got := make([]int32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint32x8Slice(v1) + vec2 := simd.LoadInt32x8Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x8()).AsInt32x8() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x8()).AsInt32x8() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x8()).AsInt32x8() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask32x8()).AsInt32x8() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x8()).AsInt32x8() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x8()).AsInt32x8() + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8Uint8x32Int8x32Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint8x32Slice(v1) + vec2 := simd.LoadInt8x32Slice(v2) + vec3 := simd.LoadInt32x8Slice(v3) + switch which { + case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) + case "MaskedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8Uint8x32Int8x32Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint8x32Slice(v1) + vec2 := simd.LoadInt8x32Slice(v2) + switch which { + case "SaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) + case "UnsignedSignedQuadDotProdAccumulate": + gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8Unary(t *testing.T, v0 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadInt32x8Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask32x8()) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint32x16Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16Compare(t *testing.T, v0 []uint32, v1 []uint32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint32x16Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt32x16() + case "Greater": + gotv = vec0.Greater(vec1).AsInt32x16() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt32x16() + case "Less": + gotv = vec0.Less(vec1).AsInt32x16() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt32x16() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt32x16() + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16Uint8x64Int8x64Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint8x64Slice(v1) + vec2 := simd.LoadInt8x64Slice(v2) + vec3 := simd.LoadInt32x16Slice(v3) + switch which { + case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + case "MaskedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16Uint8x64Int8x64Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint8x64Slice(v1) + vec2 := simd.LoadInt8x64Slice(v2) + switch which { + case "SaturatedUnsignedSignedQuadDotProdAccumulate": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2) + case "UnsignedSignedQuadDotProdAccumulate": + gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16Unary(t *testing.T, v0 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask32x16()) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x2Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadUint64x2Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulEvenWiden": + gotv = vec0.MulEvenWiden(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadUint64x2Slice(v1) + vec2 := simd.LoadInt64x2Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x2()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x2()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x2()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask64x2()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask64x2()) + case "MaskedMulEvenWiden": + gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x2()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask64x2()) + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x2Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x2 + got := make([]int64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadUint64x2Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt64x2() + case "Greater": + gotv = vec0.Greater(vec1).AsInt64x2() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt64x2() + case "Less": + gotv = vec0.Less(vec1).AsInt64x2() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt64x2() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt64x2() + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x2MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x2 + got := make([]int64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadUint64x2Slice(v1) + vec2 := simd.LoadInt64x2Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x2()).AsInt64x2() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x2()).AsInt64x2() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x2()).AsInt64x2() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask64x2()).AsInt64x2() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x2()).AsInt64x2() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x2()).AsInt64x2() + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x2Unary(t *testing.T, v0 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x2UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadInt64x2Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask64x2()) + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x4Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadUint64x4Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulEvenWiden": + gotv = vec0.MulEvenWiden(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadUint64x4Slice(v1) + vec2 := simd.LoadInt64x4Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x4()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x4()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x4()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask64x4()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask64x4()) + case "MaskedMulEvenWiden": + gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x4()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask64x4()) + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x4Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x4 + got := make([]int64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadUint64x4Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt64x4() + case "Greater": + gotv = vec0.Greater(vec1).AsInt64x4() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt64x4() + case "Less": + gotv = vec0.Less(vec1).AsInt64x4() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt64x4() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt64x4() + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x4MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x4 + got := make([]int64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadUint64x4Slice(v1) + vec2 := simd.LoadInt64x4Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x4()).AsInt64x4() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x4()).AsInt64x4() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x4()).AsInt64x4() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask64x4()).AsInt64x4() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x4()).AsInt64x4() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x4()).AsInt64x4() + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x4Unary(t *testing.T, v0 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x4UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadInt64x4Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask64x4()) + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x8Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x8 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadUint64x8Slice(v1) + switch which { + case "Add": + gotv = vec0.Add(vec1) + case "And": + gotv = vec0.And(vec1) + case "AndNot": + gotv = vec0.AndNot(vec1) + case "Max": + gotv = vec0.Max(vec1) + case "Min": + gotv = vec0.Min(vec1) + case "MulEvenWiden": + gotv = vec0.MulEvenWiden(vec1) + case "Or": + gotv = vec0.Or(vec1) + case "Sub": + gotv = vec0.Sub(vec1) + case "Xor": + gotv = vec0.Xor(vec1) + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x8 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadUint64x8Slice(v1) + vec2 := simd.LoadInt64x8Slice(v2) + switch which { + case "MaskedAdd": + gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x8()) + case "MaskedAnd": + gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x8()) + case "MaskedAndNot": + gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x8()) + case "MaskedMax": + gotv = vec0.MaskedMax(vec1, vec2.AsMask64x8()) + case "MaskedMin": + gotv = vec0.MaskedMin(vec1, vec2.AsMask64x8()) + case "MaskedMulEvenWiden": + gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x8()) + case "MaskedOr": + gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8()) + case "MaskedSub": + gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8()) + case "MaskedXor": + gotv = vec0.MaskedXor(vec1, vec2.AsMask64x8()) + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x8Compare(t *testing.T, v0 []uint64, v1 []uint64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x8 + got := make([]int64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadUint64x8Slice(v1) + switch which { + case "Equal": + gotv = vec0.Equal(vec1).AsInt64x8() + case "Greater": + gotv = vec0.Greater(vec1).AsInt64x8() + case "GreaterEqual": + gotv = vec0.GreaterEqual(vec1).AsInt64x8() + case "Less": + gotv = vec0.Less(vec1).AsInt64x8() + case "LessEqual": + gotv = vec0.LessEqual(vec1).AsInt64x8() + case "NotEqual": + gotv = vec0.NotEqual(vec1).AsInt64x8() + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x8MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x8 + got := make([]int64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadUint64x8Slice(v1) + vec2 := simd.LoadInt64x8Slice(v2) + switch which { + case "MaskedEqual": + gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x8()).AsInt64x8() + case "MaskedGreater": + gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x8()).AsInt64x8() + case "MaskedGreaterEqual": + gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x8()).AsInt64x8() + case "MaskedLess": + gotv = vec0.MaskedLess(vec1, vec2.AsMask64x8()).AsInt64x8() + case "MaskedLessEqual": + gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x8()).AsInt64x8() + case "MaskedNotEqual": + gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x8()).AsInt64x8() + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x8Unary(t *testing.T, v0 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x8 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + switch which { + case "PopCount": + gotv = vec0.PopCount() + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x8 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadInt64x8Slice(v1) + switch which { + case "MaskedPopCount": + gotv = vec0.MaskedPopCount(vec1.AsMask64x8()) + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +}