diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs index da30fd96db..b51720982d 100644 --- a/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/crates/core_arch/src/x86/avx512vbmi2.rs @@ -887,7 +887,7 @@ pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshrdvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) + transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -923,7 +923,7 @@ pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshrdvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) + transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -959,7 +959,7 @@ pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshrdvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) + transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -995,7 +995,7 @@ pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshrdvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) + transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1036,7 +1036,7 @@ pub unsafe fn _mm512_maskz_shrdv_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshrdvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) + transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1072,7 +1072,7 @@ pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshrdvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) + transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1108,7 +1108,7 @@ pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshrdvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) + transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1149,7 +1149,7 @@ pub unsafe fn _mm512_maskz_shrdv_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshrdvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) + transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1190,7 +1190,7 @@ pub unsafe fn _mm256_maskz_shrdv_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshrdvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) + transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1228,12 +1228,7 @@ pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - transmute(vpshldvq( - a.as_i64x8(), - b.as_i64x8(), - _mm512_set1_epi64(imm8).as_i64x8(), - )) + _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1251,12 +1246,7 @@ pub unsafe fn _mm512_mask_shldi_epi64( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x8 = vpshldvq( - a.as_i64x8(), - b.as_i64x8(), - _mm512_set1_epi64(imm8).as_i64x8(), - ); + let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1274,12 +1264,7 @@ pub unsafe fn _mm512_maskz_shldi_epi64( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x8 = vpshldvq( - a.as_i64x8(), - b.as_i64x8(), - _mm512_set1_epi64(imm8).as_i64x8(), - ); + let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1294,12 +1279,7 @@ pub unsafe fn _mm512_maskz_shldi_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - transmute(vpshldvq256( - a.as_i64x4(), - b.as_i64x4(), - _mm256_set1_epi64x(imm8).as_i64x4(), - )) + _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1317,12 +1297,7 @@ pub unsafe fn _mm256_mask_shldi_epi64( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x4 = vpshldvq256( - a.as_i64x4(), - b.as_i64x4(), - _mm256_set1_epi64x(imm8).as_i64x4(), - ); + let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) } @@ -1340,12 +1315,7 @@ pub unsafe fn _mm256_maskz_shldi_epi64( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x4 = vpshldvq256( - a.as_i64x4(), - b.as_i64x4(), - _mm256_set1_epi64x(imm8).as_i64x4(), - ); + let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1360,12 +1330,7 @@ pub unsafe fn _mm256_maskz_shldi_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - transmute(vpshldvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8).as_i64x2(), - )) + _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1383,8 +1348,7 @@ pub unsafe fn _mm_mask_shldi_epi64( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x2 = vpshldvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); + let shf = _mm_shldi_epi64::(a, b).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) } @@ -1402,8 +1366,7 @@ pub unsafe fn _mm_maskz_shldi_epi64( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x2 = vpshldvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); + let shf = _mm_shldi_epi64::(a, b).as_i64x2(); let zero = _mm_setzero_si128().as_i64x2(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1418,11 +1381,7 @@ pub unsafe fn _mm_maskz_shldi_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - transmute(vpshldvd( - a.as_i32x16(), - b.as_i32x16(), - _mm512_set1_epi32(IMM8).as_i32x16(), - )) + _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1440,11 +1399,7 @@ pub unsafe fn _mm512_mask_shldi_epi32( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x16 = vpshldvd( - a.as_i32x16(), - b.as_i32x16(), - _mm512_set1_epi32(IMM8).as_i32x16(), - ); + let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1462,11 +1417,7 @@ pub unsafe fn _mm512_maskz_shldi_epi32( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x16 = vpshldvd( - a.as_i32x16(), - b.as_i32x16(), - _mm512_set1_epi32(IMM8).as_i32x16(), - ); + let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1481,11 +1432,7 @@ pub unsafe fn _mm512_maskz_shldi_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(vpshldvd256( - a.as_i32x8(), - b.as_i32x8(), - _mm256_set1_epi32(IMM8).as_i32x8(), - )) + _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1503,11 +1450,7 @@ pub unsafe fn _mm256_mask_shldi_epi32( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x8 = vpshldvd256( - a.as_i32x8(), - b.as_i32x8(), - _mm256_set1_epi32(IMM8).as_i32x8(), - ); + let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) } @@ -1525,11 +1468,7 @@ pub unsafe fn _mm256_maskz_shldi_epi32( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x8 = vpshldvd256( - a.as_i32x8(), - b.as_i32x8(), - _mm256_set1_epi32(IMM8).as_i32x8(), - ); + let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1544,11 +1483,7 @@ pub unsafe fn _mm256_maskz_shldi_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute(vpshldvd128( - a.as_i32x4(), - b.as_i32x4(), - _mm_set1_epi32(IMM8).as_i32x4(), - )) + _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1566,7 +1501,7 @@ pub unsafe fn _mm_mask_shldi_epi32( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); + let shf = _mm_shldi_epi32::(a, b).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) } @@ -1584,7 +1519,7 @@ pub unsafe fn _mm_maskz_shldi_epi32( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); + let shf = _mm_shldi_epi32::(a, b).as_i32x4(); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1599,12 +1534,7 @@ pub unsafe fn _mm_maskz_shldi_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - transmute(vpshldvw( - a.as_i16x32(), - b.as_i16x32(), - _mm512_set1_epi16(imm8).as_i16x32(), - )) + _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1622,12 +1552,7 @@ pub unsafe fn _mm512_mask_shldi_epi16( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x32 = vpshldvw( - a.as_i16x32(), - b.as_i16x32(), - _mm512_set1_epi16(imm8).as_i16x32(), - ); + let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -1645,12 +1570,7 @@ pub unsafe fn _mm512_maskz_shldi_epi16( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x32 = vpshldvw( - a.as_i16x32(), - b.as_i16x32(), - _mm512_set1_epi16(imm8).as_i16x32(), - ); + let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1665,12 +1585,7 @@ pub unsafe fn _mm512_maskz_shldi_epi16( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - transmute(vpshldvw256( - a.as_i16x16(), - b.as_i16x16(), - _mm256_set1_epi16(imm8).as_i16x16(), - )) + _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1688,12 +1603,7 @@ pub unsafe fn _mm256_mask_shldi_epi16( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x16 = vpshldvw256( - a.as_i16x16(), - b.as_i16x16(), - _mm256_set1_epi16(imm8).as_i16x16(), - ); + let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) } @@ -1711,12 +1621,7 @@ pub unsafe fn _mm256_maskz_shldi_epi16( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x16 = vpshldvw256( - a.as_i16x16(), - b.as_i16x16(), - _mm256_set1_epi16(imm8).as_i16x16(), - ); + let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1731,12 +1636,7 @@ pub unsafe fn _mm256_maskz_shldi_epi16( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - transmute(vpshldvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8).as_i16x8(), - )) + _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1754,8 +1654,7 @@ pub unsafe fn _mm_mask_shldi_epi16( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x8 = vpshldvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); + let shf = _mm_shldi_epi16::(a, b).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) } @@ -1773,8 +1672,7 @@ pub unsafe fn _mm_maskz_shldi_epi16( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x8 = vpshldvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); + let shf = _mm_shldi_epi16::(a, b).as_i16x8(); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1789,12 +1687,7 @@ pub unsafe fn _mm_maskz_shldi_epi16( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - transmute(vpshrdvq( - a.as_i64x8(), - b.as_i64x8(), - _mm512_set1_epi64(imm8).as_i64x8(), - )) + _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). @@ -1812,12 +1705,7 @@ pub unsafe fn _mm512_mask_shrdi_epi64( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x8 = vpshrdvq( - a.as_i64x8(), - b.as_i64x8(), - _mm512_set1_epi64(imm8).as_i64x8(), - ); + let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1835,12 +1723,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi64( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x8 = vpshrdvq( - a.as_i64x8(), - b.as_i64x8(), - _mm512_set1_epi64(imm8).as_i64x8(), - ); + let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1855,12 +1738,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - transmute(vpshrdvq256( - a.as_i64x4(), - b.as_i64x4(), - _mm256_set1_epi64x(imm8).as_i64x4(), - )) + _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). @@ -1878,12 +1756,7 @@ pub unsafe fn _mm256_mask_shrdi_epi64( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x4 = vpshrdvq256( - a.as_i64x4(), - b.as_i64x4(), - _mm256_set1_epi64x(imm8).as_i64x4(), - ); + let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) } @@ -1901,12 +1774,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi64( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x4 = vpshrdvq256( - a.as_i64x4(), - b.as_i64x4(), - _mm256_set1_epi64x(imm8).as_i64x4(), - ); + let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1921,12 +1789,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - transmute(vpshrdvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8).as_i64x2(), - )) + _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set). @@ -1944,8 +1807,7 @@ pub unsafe fn _mm_mask_shrdi_epi64( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x2 = vpshrdvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); + let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) } @@ -1963,8 +1825,7 @@ pub unsafe fn _mm_maskz_shrdi_epi64( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i64; - let shf: i64x2 = vpshrdvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); + let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); let zero = _mm_setzero_si128().as_i64x2(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1979,11 +1840,7 @@ pub unsafe fn _mm_maskz_shrdi_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - transmute(vpshrdvd( - a.as_i32x16(), - b.as_i32x16(), - _mm512_set1_epi32(IMM8).as_i32x16(), - )) + _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2001,11 +1858,7 @@ pub unsafe fn _mm512_mask_shrdi_epi32( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x16 = vpshrdvd( - a.as_i32x16(), - b.as_i32x16(), - _mm512_set1_epi32(IMM8).as_i32x16(), - ); + let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -2023,11 +1876,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi32( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x16 = vpshrdvd( - a.as_i32x16(), - b.as_i32x16(), - _mm512_set1_epi32(IMM8).as_i32x16(), - ); + let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -2042,11 +1891,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(vpshrdvd256( - a.as_i32x8(), - b.as_i32x8(), - _mm256_set1_epi32(IMM8).as_i32x8(), - )) + _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2064,11 +1909,7 @@ pub unsafe fn _mm256_mask_shrdi_epi32( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x8 = vpshrdvd256( - a.as_i32x8(), - b.as_i32x8(), - _mm256_set1_epi32(IMM8).as_i32x8(), - ); + let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) } @@ -2086,11 +1927,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi32( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x8 = vpshrdvd256( - a.as_i32x8(), - b.as_i32x8(), - _mm256_set1_epi32(IMM8).as_i32x8(), - ); + let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -2105,11 +1942,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute(vpshrdvd128( - a.as_i32x4(), - b.as_i32x4(), - _mm_set1_epi32(IMM8).as_i32x4(), - )) + _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2127,7 +1960,7 @@ pub unsafe fn _mm_mask_shrdi_epi32( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); + let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) } @@ -2145,7 +1978,7 @@ pub unsafe fn _mm_maskz_shrdi_epi32( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); + let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -2160,13 +1993,7 @@ pub unsafe fn _mm_maskz_shrdi_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - assert!(matches!(imm8, 0..=255)); - transmute(vpshrdvw( - a.as_i16x32(), - b.as_i16x32(), - _mm512_set1_epi16(imm8).as_i16x32(), - )) + _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2184,13 +2011,7 @@ pub unsafe fn _mm512_mask_shrdi_epi16( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - assert!(matches!(imm8, 0..=255)); - let shf: i16x32 = vpshrdvw( - a.as_i16x32(), - b.as_i16x32(), - _mm512_set1_epi16(imm8).as_i16x32(), - ); + let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -2208,13 +2029,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi16( b: __m512i, ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - assert!(matches!(imm8, 0..=255)); - let shf: i16x32 = vpshrdvw( - a.as_i16x32(), - b.as_i16x32(), - _mm512_set1_epi16(imm8).as_i16x32(), - ); + let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -2229,13 +2044,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi16( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - assert!(matches!(imm8, 0..=255)); - transmute(vpshrdvw256( - a.as_i16x16(), - b.as_i16x16(), - _mm256_set1_epi16(imm8).as_i16x16(), - )) + _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2253,13 +2062,7 @@ pub unsafe fn _mm256_mask_shrdi_epi16( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - assert!(matches!(imm8, 0..=255)); - let shf: i16x16 = vpshrdvw256( - a.as_i16x16(), - b.as_i16x16(), - _mm256_set1_epi16(imm8).as_i16x16(), - ); + let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) } @@ -2277,12 +2080,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi16( b: __m256i, ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x16 = vpshrdvw256( - a.as_i16x16(), - b.as_i16x16(), - _mm256_set1_epi16(imm8).as_i16x16(), - ); + let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -2297,12 +2095,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi16( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - transmute(vpshrdvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8).as_i16x8(), - )) + _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2320,8 +2113,7 @@ pub unsafe fn _mm_mask_shrdi_epi16( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x8 = vpshrdvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); + let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) } @@ -2339,8 +2131,7 @@ pub unsafe fn _mm_maskz_shrdi_epi16( b: __m128i, ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let imm8 = IMM8 as i16; - let shf: i16x8 = vpshrdvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); + let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -3069,8 +2860,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_shrdv_epi64() { - let a = _mm512_set1_epi64(8); - let b = _mm512_set1_epi64(2); + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); let c = _mm512_set1_epi64(1); let r = _mm512_shrdv_epi64(a, b, c); let e = _mm512_set1_epi64(1); @@ -3079,8 +2870,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_mask_shrdv_epi64() { - let a = _mm512_set1_epi64(8); - let b = _mm512_set1_epi64(2); + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); let c = _mm512_set1_epi64(1); let r = _mm512_mask_shrdv_epi64(a, 0, b, c); assert_eq_m512i(r, a); @@ -3091,8 +2882,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_maskz_shrdv_epi64() { - let a = _mm512_set1_epi64(8); - let b = _mm512_set1_epi64(2); + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); let c = _mm512_set1_epi64(1); let r = _mm512_maskz_shrdv_epi64(0, a, b, c); assert_eq_m512i(r, _mm512_setzero_si512()); @@ -3103,8 +2894,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_shrdv_epi64() { - let a = _mm256_set1_epi64x(8); - let b = _mm256_set1_epi64x(2); + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); let c = _mm256_set1_epi64x(1); let r = _mm256_shrdv_epi64(a, b, c); let e = _mm256_set1_epi64x(1); @@ -3113,8 +2904,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_mask_shrdv_epi64() { - let a = _mm256_set1_epi64x(8); - let b = _mm256_set1_epi64x(2); + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); let c = _mm256_set1_epi64x(1); let r = _mm256_mask_shrdv_epi64(a, 0, b, c); assert_eq_m256i(r, a); @@ -3125,8 +2916,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_maskz_shrdv_epi64() { - let a = _mm256_set1_epi64x(8); - let b = _mm256_set1_epi64x(2); + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); let c = _mm256_set1_epi64x(1); let r = _mm256_maskz_shrdv_epi64(0, a, b, c); assert_eq_m256i(r, _mm256_setzero_si256()); @@ -3137,8 +2928,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_shrdv_epi64() { - let a = _mm_set1_epi64x(8); - let b = _mm_set1_epi64x(2); + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); let c = _mm_set1_epi64x(1); let r = _mm_shrdv_epi64(a, b, c); let e = _mm_set1_epi64x(1); @@ -3147,8 +2938,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_mask_shrdv_epi64() { - let a = _mm_set1_epi64x(8); - let b = _mm_set1_epi64x(2); + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); let c = _mm_set1_epi64x(1); let r = _mm_mask_shrdv_epi64(a, 0, b, c); assert_eq_m128i(r, a); @@ -3159,8 +2950,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_maskz_shrdv_epi64() { - let a = _mm_set1_epi64x(8); - let b = _mm_set1_epi64x(2); + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); let c = _mm_set1_epi64x(1); let r = _mm_maskz_shrdv_epi64(0, a, b, c); assert_eq_m128i(r, _mm_setzero_si128()); @@ -3171,8 +2962,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_shrdv_epi32() { - let a = _mm512_set1_epi32(8); - let b = _mm512_set1_epi32(2); + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); let c = _mm512_set1_epi32(1); let r = _mm512_shrdv_epi32(a, b, c); let e = _mm512_set1_epi32(1); @@ -3181,8 +2972,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_mask_shrdv_epi32() { - let a = _mm512_set1_epi32(8); - let b = _mm512_set1_epi32(2); + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); let c = _mm512_set1_epi32(1); let r = _mm512_mask_shrdv_epi32(a, 0, b, c); assert_eq_m512i(r, a); @@ -3193,8 +2984,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_maskz_shrdv_epi32() { - let a = _mm512_set1_epi32(8); - let b = _mm512_set1_epi32(2); + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); let c = _mm512_set1_epi32(1); let r = _mm512_maskz_shrdv_epi32(0, a, b, c); assert_eq_m512i(r, _mm512_setzero_si512()); @@ -3205,8 +2996,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_shrdv_epi32() { - let a = _mm256_set1_epi32(8); - let b = _mm256_set1_epi32(2); + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); let c = _mm256_set1_epi32(1); let r = _mm256_shrdv_epi32(a, b, c); let e = _mm256_set1_epi32(1); @@ -3215,8 +3006,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_mask_shrdv_epi32() { - let a = _mm256_set1_epi32(8); - let b = _mm256_set1_epi32(2); + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); let c = _mm256_set1_epi32(1); let r = _mm256_mask_shrdv_epi32(a, 0, b, c); assert_eq_m256i(r, a); @@ -3227,8 +3018,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_maskz_shrdv_epi32() { - let a = _mm256_set1_epi32(8); - let b = _mm256_set1_epi32(2); + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); let c = _mm256_set1_epi32(1); let r = _mm256_maskz_shrdv_epi32(0, a, b, c); assert_eq_m256i(r, _mm256_setzero_si256()); @@ -3239,8 +3030,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_shrdv_epi32() { - let a = _mm_set1_epi32(8); - let b = _mm_set1_epi32(2); + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); let c = _mm_set1_epi32(1); let r = _mm_shrdv_epi32(a, b, c); let e = _mm_set1_epi32(1); @@ -3249,8 +3040,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_mask_shrdv_epi32() { - let a = _mm_set1_epi32(8); - let b = _mm_set1_epi32(2); + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); let c = _mm_set1_epi32(1); let r = _mm_mask_shrdv_epi32(a, 0, b, c); assert_eq_m128i(r, a); @@ -3261,8 +3052,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_maskz_shrdv_epi32() { - let a = _mm_set1_epi32(8); - let b = _mm_set1_epi32(2); + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); let c = _mm_set1_epi32(1); let r = _mm_maskz_shrdv_epi32(0, a, b, c); assert_eq_m128i(r, _mm_setzero_si128()); @@ -3273,8 +3064,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_shrdv_epi16() { - let a = _mm512_set1_epi16(8); - let b = _mm512_set1_epi16(2); + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); let c = _mm512_set1_epi16(1); let r = _mm512_shrdv_epi16(a, b, c); let e = _mm512_set1_epi16(1); @@ -3283,8 +3074,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_mask_shrdv_epi16() { - let a = _mm512_set1_epi16(8); - let b = _mm512_set1_epi16(2); + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); let c = _mm512_set1_epi16(1); let r = _mm512_mask_shrdv_epi16(a, 0, b, c); assert_eq_m512i(r, a); @@ -3295,8 +3086,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_maskz_shrdv_epi16() { - let a = _mm512_set1_epi16(8); - let b = _mm512_set1_epi16(2); + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); let c = _mm512_set1_epi16(1); let r = _mm512_maskz_shrdv_epi16(0, a, b, c); assert_eq_m512i(r, _mm512_setzero_si512()); @@ -3307,8 +3098,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_shrdv_epi16() { - let a = _mm256_set1_epi16(8); - let b = _mm256_set1_epi16(2); + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); let c = _mm256_set1_epi16(1); let r = _mm256_shrdv_epi16(a, b, c); let e = _mm256_set1_epi16(1); @@ -3317,8 +3108,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_mask_shrdv_epi16() { - let a = _mm256_set1_epi16(8); - let b = _mm256_set1_epi16(2); + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); let c = _mm256_set1_epi16(1); let r = _mm256_mask_shrdv_epi16(a, 0, b, c); assert_eq_m256i(r, a); @@ -3329,8 +3120,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_maskz_shrdv_epi16() { - let a = _mm256_set1_epi16(8); - let b = _mm256_set1_epi16(2); + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); let c = _mm256_set1_epi16(1); let r = _mm256_maskz_shrdv_epi16(0, a, b, c); assert_eq_m256i(r, _mm256_setzero_si256()); @@ -3341,8 +3132,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_shrdv_epi16() { - let a = _mm_set1_epi16(8); - let b = _mm_set1_epi16(2); + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); let c = _mm_set1_epi16(1); let r = _mm_shrdv_epi16(a, b, c); let e = _mm_set1_epi16(1); @@ -3351,8 +3142,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_mask_shrdv_epi16() { - let a = _mm_set1_epi16(8); - let b = _mm_set1_epi16(2); + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); let c = _mm_set1_epi16(1); let r = _mm_mask_shrdv_epi16(a, 0, b, c); assert_eq_m128i(r, a); @@ -3363,8 +3154,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_maskz_shrdv_epi16() { - let a = _mm_set1_epi16(8); - let b = _mm_set1_epi16(2); + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); let c = _mm_set1_epi16(1); let r = _mm_maskz_shrdv_epi16(0, a, b, c); assert_eq_m128i(r, _mm_setzero_si128()); @@ -3654,8 +3445,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_shrdi_epi64() { - let a = _mm512_set1_epi64(8); - let b = _mm512_set1_epi64(2); + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); let r = _mm512_shrdi_epi64::<1>(a, b); let e = _mm512_set1_epi64(1); assert_eq_m512i(r, e); @@ -3663,8 +3454,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_mask_shrdi_epi64() { - let a = _mm512_set1_epi64(8); - let b = _mm512_set1_epi64(2); + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b); @@ -3674,8 +3465,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_maskz_shrdi_epi64() { - let a = _mm512_set1_epi64(8); - let b = _mm512_set1_epi64(2); + let a = _mm512_set1_epi64(2); + let b = _mm512_set1_epi64(8); let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b); @@ -3685,8 +3476,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_shrdi_epi64() { - let a = _mm256_set1_epi64x(8); - let b = _mm256_set1_epi64x(2); + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); let r = _mm256_shrdi_epi64::<1>(a, b); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); @@ -3694,8 +3485,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_mask_shrdi_epi64() { - let a = _mm256_set1_epi64x(8); - let b = _mm256_set1_epi64x(2); + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m256i(r, a); let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b); @@ -3705,8 +3496,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_maskz_shrdi_epi64() { - let a = _mm256_set1_epi64x(8); - let b = _mm256_set1_epi64x(2); + let a = _mm256_set1_epi64x(2); + let b = _mm256_set1_epi64x(8); let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b); @@ -3716,8 +3507,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_shrdi_epi64() { - let a = _mm_set1_epi64x(8); - let b = _mm_set1_epi64x(2); + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); let r = _mm_shrdi_epi64::<1>(a, b); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); @@ -3725,8 +3516,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_mask_shrdi_epi64() { - let a = _mm_set1_epi64x(8); - let b = _mm_set1_epi64x(2); + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m128i(r, a); let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b); @@ -3736,8 +3527,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_maskz_shrdi_epi64() { - let a = _mm_set1_epi64x(8); - let b = _mm_set1_epi64x(2); + let a = _mm_set1_epi64x(2); + let b = _mm_set1_epi64x(8); let r = _mm_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b); @@ -3747,8 +3538,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_shrdi_epi32() { - let a = _mm512_set1_epi32(8); - let b = _mm512_set1_epi32(2); + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); let r = _mm512_shrdi_epi32::<1>(a, b); let e = _mm512_set1_epi32(1); assert_eq_m512i(r, e); @@ -3756,8 +3547,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_mask_shrdi_epi32() { - let a = _mm512_set1_epi32(8); - let b = _mm512_set1_epi32(2); + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b); @@ -3767,8 +3558,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_maskz_shrdi_epi32() { - let a = _mm512_set1_epi32(8); - let b = _mm512_set1_epi32(2); + let a = _mm512_set1_epi32(2); + let b = _mm512_set1_epi32(8); let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b); @@ -3778,8 +3569,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_shrdi_epi32() { - let a = _mm256_set1_epi32(8); - let b = _mm256_set1_epi32(2); + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); let r = _mm256_shrdi_epi32::<1>(a, b); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); @@ -3787,8 +3578,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_mask_shrdi_epi32() { - let a = _mm256_set1_epi32(8); - let b = _mm256_set1_epi32(2); + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m256i(r, a); let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b); @@ -3798,8 +3589,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_maskz_shrdi_epi32() { - let a = _mm256_set1_epi32(8); - let b = _mm256_set1_epi32(2); + let a = _mm256_set1_epi32(2); + let b = _mm256_set1_epi32(8); let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b); @@ -3809,8 +3600,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_shrdi_epi32() { - let a = _mm_set1_epi32(8); - let b = _mm_set1_epi32(2); + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); let r = _mm_shrdi_epi32::<1>(a, b); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); @@ -3818,8 +3609,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_mask_shrdi_epi32() { - let a = _mm_set1_epi32(8); - let b = _mm_set1_epi32(2); + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m128i(r, a); let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b); @@ -3829,8 +3620,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_maskz_shrdi_epi32() { - let a = _mm_set1_epi32(8); - let b = _mm_set1_epi32(2); + let a = _mm_set1_epi32(2); + let b = _mm_set1_epi32(8); let r = _mm_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b); @@ -3840,8 +3631,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_shrdi_epi16() { - let a = _mm512_set1_epi16(8); - let b = _mm512_set1_epi16(2); + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); let r = _mm512_shrdi_epi16::<1>(a, b); let e = _mm512_set1_epi16(1); assert_eq_m512i(r, e); @@ -3849,8 +3640,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_mask_shrdi_epi16() { - let a = _mm512_set1_epi16(8); - let b = _mm512_set1_epi16(2); + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b); @@ -3860,8 +3651,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2")] unsafe fn test_mm512_maskz_shrdi_epi16() { - let a = _mm512_set1_epi16(8); - let b = _mm512_set1_epi16(2); + let a = _mm512_set1_epi16(2); + let b = _mm512_set1_epi16(8); let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b); @@ -3871,8 +3662,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_shrdi_epi16() { - let a = _mm256_set1_epi16(8); - let b = _mm256_set1_epi16(2); + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); let r = _mm256_shrdi_epi16::<1>(a, b); let e = _mm256_set1_epi16(1); assert_eq_m256i(r, e); @@ -3880,8 +3671,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_mask_shrdi_epi16() { - let a = _mm256_set1_epi16(8); - let b = _mm256_set1_epi16(2); + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m256i(r, a); let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b); @@ -3891,8 +3682,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm256_maskz_shrdi_epi16() { - let a = _mm256_set1_epi16(8); - let b = _mm256_set1_epi16(2); + let a = _mm256_set1_epi16(2); + let b = _mm256_set1_epi16(8); let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b); @@ -3902,8 +3693,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_shrdi_epi16() { - let a = _mm_set1_epi16(8); - let b = _mm_set1_epi16(2); + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); let r = _mm_shrdi_epi16::<1>(a, b); let e = _mm_set1_epi16(1); assert_eq_m128i(r, e); @@ -3911,8 +3702,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_mask_shrdi_epi16() { - let a = _mm_set1_epi16(8); - let b = _mm_set1_epi16(2); + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m128i(r, a); let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b); @@ -3922,8 +3713,8 @@ mod tests { #[simd_test(enable = "avx512vbmi2,avx512vl")] unsafe fn test_mm_maskz_shrdi_epi16() { - let a = _mm_set1_epi16(8); - let b = _mm_set1_epi16(2); + let a = _mm_set1_epi16(2); + let b = _mm_set1_epi16(8); let r = _mm_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b);