Skip to content

Commit 0df4f42

Browse files
committed
Fix expected instructions for AVX 512.
1 parent d780293 commit 0df4f42

File tree

2 files changed

+16
-11
lines changed

2 files changed

+16
-11
lines changed

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8013,7 +8013,7 @@ pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>(
80138013
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi16_mask&expand=3873)
80148014
#[inline]
80158015
#[target_feature(enable = "avx512bw")]
8016-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
8016+
#[cfg_attr(test, assert_instr(vpmovw2m))]
80178017
pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
80188018
let filter = _mm512_set1_epi16(1 << 15);
80198019
let a = _mm512_and_si512(a, filter);
@@ -8025,7 +8025,7 @@ pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
80258025
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi16_mask&expand=3872)
80268026
#[inline]
80278027
#[target_feature(enable = "avx512bw,avx512vl")]
8028-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
8028+
#[cfg_attr(test, assert_instr(vpmovw2m))]
80298029
pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
80308030
let filter = _mm256_set1_epi16(1 << 15);
80318031
let a = _mm256_and_si256(a, filter);
@@ -8037,7 +8037,7 @@ pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
80378037
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi16_mask&expand=3871)
80388038
#[inline]
80398039
#[target_feature(enable = "avx512bw,avx512vl")]
8040-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
8040+
#[cfg_attr(test, assert_instr(vpmovw2m))]
80418041
pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
80428042
let filter = _mm_set1_epi16(1 << 15);
80438043
let a = _mm_and_si128(a, filter);
@@ -8049,7 +8049,7 @@ pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
80498049
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi8_mask&expand=3883)
80508050
#[inline]
80518051
#[target_feature(enable = "avx512bw")]
8052-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
8052+
#[cfg_attr(test, assert_instr(vpmovb2m))]
80538053
pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
80548054
let filter = _mm512_set1_epi8(1 << 7);
80558055
let a = _mm512_and_si512(a, filter);
@@ -8061,7 +8061,8 @@ pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
80618061
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi8_mask&expand=3882)
80628062
#[inline]
80638063
#[target_feature(enable = "avx512bw,avx512vl")]
8064-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
8064+
#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8065+
// using vpmovb2m plus converting the mask register to a standard register.
80658066
pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
80668067
let filter = _mm256_set1_epi8(1 << 7);
80678068
let a = _mm256_and_si256(a, filter);
@@ -8073,7 +8074,8 @@ pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
80738074
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi8_mask&expand=3881)
80748075
#[inline]
80758076
#[target_feature(enable = "avx512bw,avx512vl")]
8076-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
8077+
#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8078+
// using vpmovb2m plus converting the mask register to a standard register.
80778079
pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
80788080
let filter = _mm_set1_epi8(1 << 7);
80798081
let a = _mm_and_si128(a, filter);
@@ -8216,8 +8218,9 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
82168218
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207)
82178219
#[inline]
82188220
#[target_feature(enable = "avx512bw")]
8219-
#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kaddd
8220-
//llvm.x86.avx512.kadd.d
8221+
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
8222+
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
8223+
//llvm.x86.avx512.kadd.d
82218224
pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
82228225
transmute(a + b)
82238226
}
@@ -8227,7 +8230,9 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
82278230
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208)
82288231
#[inline]
82298232
#[target_feature(enable = "avx512bw")]
8230-
#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kaddq
8233+
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
8234+
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
8235+
//llvm.x86.avx512.kadd.d
82318236
pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
82328237
transmute(a + b)
82338238
}

crates/core_arch/src/x86_64/avx512f.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
7575
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035)
7676
#[inline]
7777
#[target_feature(enable = "avx512f")]
78-
#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss
78+
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
7979
pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
8080
let b = b as f32;
8181
let r = simd_insert(a, 0, b);
@@ -87,7 +87,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
8787
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034)
8888
#[inline]
8989
#[target_feature(enable = "avx512f")]
90-
#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd
90+
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
9191
pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
9292
let b = b as f64;
9393
let r = simd_insert(a, 0, b);

0 commit comments

Comments
 (0)