diff --git a/src/simd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/_gen/simdgen/ops/Moves/categories.yaml index 0c733e12ee1..49006f8801f 100644 --- a/src/simd/_gen/simdgen/ops/Moves/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Moves/categories.yaml @@ -129,7 +129,7 @@ // halves of the output. The selection is chosen by the constant parameter h1h0l1l0 // where each {h,l}{1,0} is two bits specify which element from y or x to select. // For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns - // {2, 1, 4, 6} (don't forget that the binary constant is written big-endian). + // {2, 0, 5, 7} (don't forget that the binary constant is written big-endian). - go: concatSelectedConstant commutative: false @@ -139,7 +139,7 @@ // NAME concatenates selected elements from x and y into the lower and upper // halves of the output. The selection is chosen by the constant parameter hilo // where hi and lo are each one bit specifying which 64-bit element to select - // from y and x. For example {4,5}.concatSelectedConstant(0b10, {6,7}) + // from y and x. For example {4,5}.NAME(0b10, {6,7}) // returns {4,7}; bit 0, selecting from x, is zero, and selects 4, and bit 1, // selecting from y, is 1, and selects 7. @@ -147,13 +147,14 @@ commutative: false out: - elemBits: 32 + bits: 256 documentation: !string |- // NAME concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 - // where each {h,l}{1,0} is two bits specify which element from y or x to select. + // where each {h,l}{1,0} is two bits specifying which element from y or x to select. // For example, - // {0,1,2,3,8,9,10,11}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) + // {0,1,2,3,8,9,10,11}.NAME(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) // returns {2,0,5,7,10,8,13,15} // (don't forget that the binary constant is written big-endian). @@ -161,6 +162,7 @@ commutative: false out: - elemBits: 64 + bits: 256 documentation: !string |- // NAME concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. @@ -168,7 +170,7 @@ // hi and lo pair select 64-bit elements from the corresponding 128-bit // subvectors of x and y. // - // For example {4,5,8,9}.concatSelectedConstant(0b_11_10, {6,7,10,11}) + // For example {4,5,8,9}.NAME(0b_11_10, {6,7,10,11}) // returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least // 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), // then 1, selecting element 1 from x's upper 128 bits (9), then 1, @@ -176,6 +178,44 @@ // This differs from the same method applied to a 32x8 vector, where // the 8-bit constant performs the same selection on both subvectors. +- go: concatSelectedConstantGrouped + commutative: false + out: + - elemBits: 32 + bits: 512 + documentation: !string |- + // NAME concatenates selected elements from 128-bit subvectors of x and y + // into the lower and upper halves of corresponding subvectors of the output. + // The selection is chosen by the constant parameter h1h0l1l0 + // where each {h,l}{1,0} is two bits specifying which element from y or x to select. + // For example, + // {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.NAME( + // 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215}) + // returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215} + // (don't forget that the binary constant is written big-endian). + +- go: concatSelectedConstantGrouped + commutative: false + out: + - elemBits: 64 + bits: 512 + documentation: !string |- + // NAME concatenates selected elements from 128-bit subvectors of x and y + // into the lower and upper halves of corresponding subvectors of the output. + // The selections are specified by the constant parameter hilos where each + // hi and lo pair select 64-bit elements from the corresponding 128-bit + // subvectors of x and y. + // + // For example {4,5,8,9,12,13,16,17}.NAME(0b11_00_11_10, {6,7,10,11,14,15,18,19}) + // returns {4,7,9,11,12,14,17,19}; bit 0 is zero, selecting element 0 from x's + // least 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), + // then 1, selecting element 1 from x's next 128 bits (9), then 1, + // selecting element 1 from y's upper 128 bits (11). The next two 0 bits select + // the lower elements from x and y's 3rd 128 bit groups (12, 14), the last two + // 1 bits select the upper elements from x and y's last 128 bits (17, 19). + // This differs from the same method applied to a 32x8 or 32x16 vector, where + // the 8-bit constant performs the same selection on all the subvectors. + - go: Select128FromPair commutative: false documentation: !string |- diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 91e7d91842a..17f45e6bf5e 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -7431,7 +7431,7 @@ func (x Int64x8) blendMasked(y Int64x8, mask Mask64x8) Int64x8 // halves of the output. The selection is chosen by the constant parameter h1h0l1l0 // where each {h,l}{1,0} is two bits specify which element from y or x to select. // For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns -// {2, 1, 4, 6} (don't forget that the binary constant is written big-endian). +// {2, 0, 5, 7} (don't forget that the binary constant is written big-endian). // // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -7454,7 +7454,7 @@ func (x Float64x2) concatSelectedConstant(hilo uint8, y Float64x2) Float64x2 // halves of the output. The selection is chosen by the constant parameter h1h0l1l0 // where each {h,l}{1,0} is two bits specify which element from y or x to select. // For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns -// {2, 1, 4, 6} (don't forget that the binary constant is written big-endian). +// {2, 0, 5, 7} (don't forget that the binary constant is written big-endian). // // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -7477,7 +7477,7 @@ func (x Int64x2) concatSelectedConstant(hilo uint8, y Int64x2) Int64x2 // halves of the output. The selection is chosen by the constant parameter h1h0l1l0 // where each {h,l}{1,0} is two bits specify which element from y or x to select. // For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns -// {2, 1, 4, 6} (don't forget that the binary constant is written big-endian). +// {2, 0, 5, 7} (don't forget that the binary constant is written big-endian). // // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -7501,9 +7501,9 @@ func (x Uint64x2) concatSelectedConstant(hilo uint8, y Uint64x2) Uint64x2 // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 -// where each {h,l}{1,0} is two bits specify which element from y or x to select. +// where each {h,l}{1,0} is two bits specifying which element from y or x to select. // For example, -// {0,1,2,3,8,9,10,11}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) +// {0,1,2,3,8,9,10,11}.concatSelectedConstantGrouped(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) // returns {2,0,5,7,10,8,13,15} // (don't forget that the binary constant is written big-endian). // @@ -7515,10 +7515,13 @@ func (x Float32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Float32x8) Fl // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 -// where each {h,l}{1,0} is two bits specify which element from y or x to select. +// where each {h,l}{1,0} is two bits specifying which element from y or x to select. // For example, -// {0,1,2,3,8,9,10,11}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) -// returns {2,0,5,7,10,8,13,15} +// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped( +// +// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215}) +// +// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215} // (don't forget that the binary constant is written big-endian). // // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -7532,7 +7535,7 @@ func (x Float32x16) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Float32x16) // hi and lo pair select 64-bit elements from the corresponding 128-bit // subvectors of x and y. // -// For example {4,5,8,9}.concatSelectedConstant(0b_11_10, {6,7,10,11}) +// For example {4,5,8,9}.concatSelectedConstantGrouped(0b_11_10, {6,7,10,11}) // returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least // 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), // then 1, selecting element 1 from x's upper 128 bits (9), then 1, @@ -7551,13 +7554,15 @@ func (x Float64x4) concatSelectedConstantGrouped(hilos uint8, y Float64x4) Float // hi and lo pair select 64-bit elements from the corresponding 128-bit // subvectors of x and y. // -// For example {4,5,8,9}.concatSelectedConstant(0b_11_10, {6,7,10,11}) -// returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least -// 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), -// then 1, selecting element 1 from x's upper 128 bits (9), then 1, -// selecting element 1 from y's upper 128 bits (11). -// This differs from the same method applied to a 32x8 vector, where -// the 8-bit constant performs the same selection on both subvectors. +// For example {4,5,8,9,12,13,16,17}.concatSelectedConstantGrouped(0b11_00_11_10, {6,7,10,11,14,15,18,19}) +// returns {4,7,9,11,12,14,17,19}; bit 0 is zero, selecting element 0 from x's +// least 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), +// then 1, selecting element 1 from x's next 128 bits (9), then 1, +// selecting element 1 from y's upper 128 bits (11). The next two 0 bits select +// the lower elements from x and y's 3rd 128 bit groups (12, 14), the last two +// 1 bits select the upper elements from x and y's last 128 bits (17, 19). +// This differs from the same method applied to a 32x8 or 32x16 vector, where +// the 8-bit constant performs the same selection on all the subvectors. // // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -7567,9 +7572,9 @@ func (x Float64x8) concatSelectedConstantGrouped(hilos uint8, y Float64x8) Float // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 -// where each {h,l}{1,0} is two bits specify which element from y or x to select. +// where each {h,l}{1,0} is two bits specifying which element from y or x to select. // For example, -// {0,1,2,3,8,9,10,11}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) +// {0,1,2,3,8,9,10,11}.concatSelectedConstantGrouped(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) // returns {2,0,5,7,10,8,13,15} // (don't forget that the binary constant is written big-endian). // @@ -7581,10 +7586,13 @@ func (x Int32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Int32x8) Int32x // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 -// where each {h,l}{1,0} is two bits specify which element from y or x to select. +// where each {h,l}{1,0} is two bits specifying which element from y or x to select. // For example, -// {0,1,2,3,8,9,10,11}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) -// returns {2,0,5,7,10,8,13,15} +// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped( +// +// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215}) +// +// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215} // (don't forget that the binary constant is written big-endian). // // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -7598,7 +7606,7 @@ func (x Int32x16) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Int32x16) Int3 // hi and lo pair select 64-bit elements from the corresponding 128-bit // subvectors of x and y. // -// For example {4,5,8,9}.concatSelectedConstant(0b_11_10, {6,7,10,11}) +// For example {4,5,8,9}.concatSelectedConstantGrouped(0b_11_10, {6,7,10,11}) // returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least // 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), // then 1, selecting element 1 from x's upper 128 bits (9), then 1, @@ -7617,13 +7625,15 @@ func (x Int64x4) concatSelectedConstantGrouped(hilos uint8, y Int64x4) Int64x4 // hi and lo pair select 64-bit elements from the corresponding 128-bit // subvectors of x and y. // -// For example {4,5,8,9}.concatSelectedConstant(0b_11_10, {6,7,10,11}) -// returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least -// 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), -// then 1, selecting element 1 from x's upper 128 bits (9), then 1, -// selecting element 1 from y's upper 128 bits (11). -// This differs from the same method applied to a 32x8 vector, where -// the 8-bit constant performs the same selection on both subvectors. +// For example {4,5,8,9,12,13,16,17}.concatSelectedConstantGrouped(0b11_00_11_10, {6,7,10,11,14,15,18,19}) +// returns {4,7,9,11,12,14,17,19}; bit 0 is zero, selecting element 0 from x's +// least 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), +// then 1, selecting element 1 from x's next 128 bits (9), then 1, +// selecting element 1 from y's upper 128 bits (11). The next two 0 bits select +// the lower elements from x and y's 3rd 128 bit groups (12, 14), the last two +// 1 bits select the upper elements from x and y's last 128 bits (17, 19). +// This differs from the same method applied to a 32x8 or 32x16 vector, where +// the 8-bit constant performs the same selection on all the subvectors. // // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -7633,9 +7643,9 @@ func (x Int64x8) concatSelectedConstantGrouped(hilos uint8, y Int64x8) Int64x8 // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 -// where each {h,l}{1,0} is two bits specify which element from y or x to select. +// where each {h,l}{1,0} is two bits specifying which element from y or x to select. // For example, -// {0,1,2,3,8,9,10,11}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) +// {0,1,2,3,8,9,10,11}.concatSelectedConstantGrouped(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) // returns {2,0,5,7,10,8,13,15} // (don't forget that the binary constant is written big-endian). // @@ -7647,10 +7657,13 @@ func (x Uint32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Uint32x8) Uint // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 -// where each {h,l}{1,0} is two bits specify which element from y or x to select. +// where each {h,l}{1,0} is two bits specifying which element from y or x to select. // For example, -// {0,1,2,3,8,9,10,11}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7,12,13,14,15}) -// returns {2,0,5,7,10,8,13,15} +// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped( +// +// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215}) +// +// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215} // (don't forget that the binary constant is written big-endian). // // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -7664,7 +7677,7 @@ func (x Uint32x16) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Uint32x16) Ui // hi and lo pair select 64-bit elements from the corresponding 128-bit // subvectors of x and y. // -// For example {4,5,8,9}.concatSelectedConstant(0b_11_10, {6,7,10,11}) +// For example {4,5,8,9}.concatSelectedConstantGrouped(0b_11_10, {6,7,10,11}) // returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least // 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), // then 1, selecting element 1 from x's upper 128 bits (9), then 1, @@ -7683,13 +7696,15 @@ func (x Uint64x4) concatSelectedConstantGrouped(hilos uint8, y Uint64x4) Uint64x // hi and lo pair select 64-bit elements from the corresponding 128-bit // subvectors of x and y. // -// For example {4,5,8,9}.concatSelectedConstant(0b_11_10, {6,7,10,11}) -// returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least -// 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), -// then 1, selecting element 1 from x's upper 128 bits (9), then 1, -// selecting element 1 from y's upper 128 bits (11). -// This differs from the same method applied to a 32x8 vector, where -// the 8-bit constant performs the same selection on both subvectors. +// For example {4,5,8,9,12,13,16,17}.concatSelectedConstantGrouped(0b11_00_11_10, {6,7,10,11,14,15,18,19}) +// returns {4,7,9,11,12,14,17,19}; bit 0 is zero, selecting element 0 from x's +// least 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7), +// then 1, selecting element 1 from x's next 128 bits (9), then 1, +// selecting element 1 from y's upper 128 bits (11). The next two 0 bits select +// the lower elements from x and y's 3rd 128 bit groups (12, 14), the last two +// 1 bits select the upper elements from x and y's last 128 bits (17, 19). +// This differs from the same method applied to a 32x8 or 32x16 vector, where +// the 8-bit constant performs the same selection on all the subvectors. // // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table. //