Skip to content

Commit 37d6e18

Browse files
authored
Fix x86 SIMD byte shift intrinsics (#1168)
1 parent a2e8b9a commit 37d6e18

File tree

3 files changed

+115
-98
lines changed

3 files changed

+115
-98
lines changed

crates/core_arch/src/x86/avx2.rs

+40-32
Original file line numberDiff line numberDiff line change
@@ -2585,44 +2585,52 @@ pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
25852585
#[stable(feature = "simd_x86", since = "1.27.0")]
25862586
pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
25872587
static_assert_imm8!(IMM8);
2588+
const fn mask(shift: i32, i: u32) -> u32 {
2589+
let shift = shift as u32 & 0xff;
2590+
if shift > 15 || i % 16 < shift {
2591+
0
2592+
} else {
2593+
32 + (i - shift)
2594+
}
2595+
}
25882596
let a = a.as_i8x32();
25892597
let zero = _mm256_setzero_si256().as_i8x32();
25902598
let r: i8x32 = simd_shuffle32!(
25912599
zero,
25922600
a,
25932601
<const IMM8: i32> [
2594-
32 - (IMM8 as u32 & 0xff),
2595-
33 - (IMM8 as u32 & 0xff),
2596-
34 - (IMM8 as u32 & 0xff),
2597-
35 - (IMM8 as u32 & 0xff),
2598-
36 - (IMM8 as u32 & 0xff),
2599-
37 - (IMM8 as u32 & 0xff),
2600-
38 - (IMM8 as u32 & 0xff),
2601-
39 - (IMM8 as u32 & 0xff),
2602-
40 - (IMM8 as u32 & 0xff),
2603-
41 - (IMM8 as u32 & 0xff),
2604-
42 - (IMM8 as u32 & 0xff),
2605-
43 - (IMM8 as u32 & 0xff),
2606-
44 - (IMM8 as u32 & 0xff),
2607-
45 - (IMM8 as u32 & 0xff),
2608-
46 - (IMM8 as u32 & 0xff),
2609-
47 - (IMM8 as u32 & 0xff),
2610-
48 - (IMM8 as u32 & 0xff) - 16,
2611-
49 - (IMM8 as u32 & 0xff) - 16,
2612-
50 - (IMM8 as u32 & 0xff) - 16,
2613-
51 - (IMM8 as u32 & 0xff) - 16,
2614-
52 - (IMM8 as u32 & 0xff) - 16,
2615-
53 - (IMM8 as u32 & 0xff) - 16,
2616-
54 - (IMM8 as u32 & 0xff) - 16,
2617-
55 - (IMM8 as u32 & 0xff) - 16,
2618-
56 - (IMM8 as u32 & 0xff) - 16,
2619-
57 - (IMM8 as u32 & 0xff) - 16,
2620-
58 - (IMM8 as u32 & 0xff) - 16,
2621-
59 - (IMM8 as u32 & 0xff) - 16,
2622-
60 - (IMM8 as u32 & 0xff) - 16,
2623-
61 - (IMM8 as u32 & 0xff) - 16,
2624-
62 - (IMM8 as u32 & 0xff) - 16,
2625-
63 - (IMM8 as u32 & 0xff) - 16,
2602+
mask(IMM8, 0),
2603+
mask(IMM8, 1),
2604+
mask(IMM8, 2),
2605+
mask(IMM8, 3),
2606+
mask(IMM8, 4),
2607+
mask(IMM8, 5),
2608+
mask(IMM8, 6),
2609+
mask(IMM8, 7),
2610+
mask(IMM8, 8),
2611+
mask(IMM8, 9),
2612+
mask(IMM8, 10),
2613+
mask(IMM8, 11),
2614+
mask(IMM8, 12),
2615+
mask(IMM8, 13),
2616+
mask(IMM8, 14),
2617+
mask(IMM8, 15),
2618+
mask(IMM8, 16),
2619+
mask(IMM8, 17),
2620+
mask(IMM8, 18),
2621+
mask(IMM8, 19),
2622+
mask(IMM8, 20),
2623+
mask(IMM8, 21),
2624+
mask(IMM8, 22),
2625+
mask(IMM8, 23),
2626+
mask(IMM8, 24),
2627+
mask(IMM8, 25),
2628+
mask(IMM8, 26),
2629+
mask(IMM8, 27),
2630+
mask(IMM8, 28),
2631+
mask(IMM8, 29),
2632+
mask(IMM8, 30),
2633+
mask(IMM8, 31),
26262634
],
26272635
);
26282636
transmute(r)

crates/core_arch/src/x86/avx512bw.rs

+72-64
Original file line numberDiff line numberDiff line change
@@ -8873,76 +8873,84 @@ pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
88738873
#[rustc_legacy_const_generics(1)]
88748874
pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
88758875
static_assert_imm8!(IMM8);
8876+
const fn mask(shift: i32, i: u32) -> u32 {
8877+
let shift = shift as u32 & 0xff;
8878+
if shift > 15 || i % 16 < shift {
8879+
0
8880+
} else {
8881+
64 + (i - shift)
8882+
}
8883+
}
88768884
let a = a.as_i8x64();
88778885
let zero = _mm512_setzero_si512().as_i8x64();
88788886
let r: i8x64 = simd_shuffle64!(
88798887
zero,
88808888
a,
88818889
<const IMM8: i32> [
8882-
64 - (IMM8 as u32 & 0xff),
8883-
65 - (IMM8 as u32 & 0xff),
8884-
66 - (IMM8 as u32 & 0xff),
8885-
67 - (IMM8 as u32 & 0xff),
8886-
68 - (IMM8 as u32 & 0xff),
8887-
69 - (IMM8 as u32 & 0xff),
8888-
70 - (IMM8 as u32 & 0xff),
8889-
71 - (IMM8 as u32 & 0xff),
8890-
72 - (IMM8 as u32 & 0xff),
8891-
73 - (IMM8 as u32 & 0xff),
8892-
74 - (IMM8 as u32 & 0xff),
8893-
75 - (IMM8 as u32 & 0xff),
8894-
76 - (IMM8 as u32 & 0xff),
8895-
77 - (IMM8 as u32 & 0xff),
8896-
78 - (IMM8 as u32 & 0xff),
8897-
79 - (IMM8 as u32 & 0xff),
8898-
80 - (IMM8 as u32 & 0xff) - 16,
8899-
81 - (IMM8 as u32 & 0xff) - 16,
8900-
82 - (IMM8 as u32 & 0xff) - 16,
8901-
83 - (IMM8 as u32 & 0xff) - 16,
8902-
84 - (IMM8 as u32 & 0xff) - 16,
8903-
85 - (IMM8 as u32 & 0xff) - 16,
8904-
86 - (IMM8 as u32 & 0xff) - 16,
8905-
87 - (IMM8 as u32 & 0xff) - 16,
8906-
88 - (IMM8 as u32 & 0xff) - 16,
8907-
89 - (IMM8 as u32 & 0xff) - 16,
8908-
90 - (IMM8 as u32 & 0xff) - 16,
8909-
91 - (IMM8 as u32 & 0xff) - 16,
8910-
92 - (IMM8 as u32 & 0xff) - 16,
8911-
93 - (IMM8 as u32 & 0xff) - 16,
8912-
94 - (IMM8 as u32 & 0xff) - 16,
8913-
95 - (IMM8 as u32 & 0xff) - 16,
8914-
96 - (IMM8 as u32 & 0xff) - 32,
8915-
97 - (IMM8 as u32 & 0xff) - 32,
8916-
98 - (IMM8 as u32 & 0xff) - 32,
8917-
99 - (IMM8 as u32 & 0xff) - 32,
8918-
100 - (IMM8 as u32 & 0xff) - 32,
8919-
101 - (IMM8 as u32 & 0xff) - 32,
8920-
102 - (IMM8 as u32 & 0xff) - 32,
8921-
103 - (IMM8 as u32 & 0xff) - 32,
8922-
104 - (IMM8 as u32 & 0xff) - 32,
8923-
105 - (IMM8 as u32 & 0xff) - 32,
8924-
106 - (IMM8 as u32 & 0xff) - 32,
8925-
107 - (IMM8 as u32 & 0xff) - 32,
8926-
108 - (IMM8 as u32 & 0xff) - 32,
8927-
109 - (IMM8 as u32 & 0xff) - 32,
8928-
110 - (IMM8 as u32 & 0xff) - 32,
8929-
111 - (IMM8 as u32 & 0xff) - 32,
8930-
112 - (IMM8 as u32 & 0xff) - 48,
8931-
113 - (IMM8 as u32 & 0xff) - 48,
8932-
114 - (IMM8 as u32 & 0xff) - 48,
8933-
115 - (IMM8 as u32 & 0xff) - 48,
8934-
116 - (IMM8 as u32 & 0xff) - 48,
8935-
117 - (IMM8 as u32 & 0xff) - 48,
8936-
118 - (IMM8 as u32 & 0xff) - 48,
8937-
119 - (IMM8 as u32 & 0xff) - 48,
8938-
120 - (IMM8 as u32 & 0xff) - 48,
8939-
121 - (IMM8 as u32 & 0xff) - 48,
8940-
122 - (IMM8 as u32 & 0xff) - 48,
8941-
123 - (IMM8 as u32 & 0xff) - 48,
8942-
124 - (IMM8 as u32 & 0xff) - 48,
8943-
125 - (IMM8 as u32 & 0xff) - 48,
8944-
126 - (IMM8 as u32 & 0xff) - 48,
8945-
127 - (IMM8 as u32 & 0xff) - 48,
8890+
mask(IMM8, 0),
8891+
mask(IMM8, 1),
8892+
mask(IMM8, 2),
8893+
mask(IMM8, 3),
8894+
mask(IMM8, 4),
8895+
mask(IMM8, 5),
8896+
mask(IMM8, 6),
8897+
mask(IMM8, 7),
8898+
mask(IMM8, 8),
8899+
mask(IMM8, 9),
8900+
mask(IMM8, 10),
8901+
mask(IMM8, 11),
8902+
mask(IMM8, 12),
8903+
mask(IMM8, 13),
8904+
mask(IMM8, 14),
8905+
mask(IMM8, 15),
8906+
mask(IMM8, 16),
8907+
mask(IMM8, 17),
8908+
mask(IMM8, 18),
8909+
mask(IMM8, 19),
8910+
mask(IMM8, 20),
8911+
mask(IMM8, 21),
8912+
mask(IMM8, 22),
8913+
mask(IMM8, 23),
8914+
mask(IMM8, 24),
8915+
mask(IMM8, 25),
8916+
mask(IMM8, 26),
8917+
mask(IMM8, 27),
8918+
mask(IMM8, 28),
8919+
mask(IMM8, 29),
8920+
mask(IMM8, 30),
8921+
mask(IMM8, 31),
8922+
mask(IMM8, 32),
8923+
mask(IMM8, 33),
8924+
mask(IMM8, 34),
8925+
mask(IMM8, 35),
8926+
mask(IMM8, 36),
8927+
mask(IMM8, 37),
8928+
mask(IMM8, 38),
8929+
mask(IMM8, 39),
8930+
mask(IMM8, 40),
8931+
mask(IMM8, 41),
8932+
mask(IMM8, 42),
8933+
mask(IMM8, 43),
8934+
mask(IMM8, 44),
8935+
mask(IMM8, 45),
8936+
mask(IMM8, 46),
8937+
mask(IMM8, 47),
8938+
mask(IMM8, 48),
8939+
mask(IMM8, 49),
8940+
mask(IMM8, 50),
8941+
mask(IMM8, 51),
8942+
mask(IMM8, 52),
8943+
mask(IMM8, 53),
8944+
mask(IMM8, 54),
8945+
mask(IMM8, 55),
8946+
mask(IMM8, 56),
8947+
mask(IMM8, 57),
8948+
mask(IMM8, 58),
8949+
mask(IMM8, 59),
8950+
mask(IMM8, 60),
8951+
mask(IMM8, 61),
8952+
mask(IMM8, 62),
8953+
mask(IMM8, 63),
89468954
],
89478955
);
89488956
transmute(r)

crates/core_arch/src/x86/sse2.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -425,10 +425,11 @@ pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
425425
#[target_feature(enable = "sse2")]
426426
unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
427427
const fn mask(shift: i32, i: u32) -> u32 {
428-
if (shift as u32) > 15 {
428+
let shift = shift as u32 & 0xff;
429+
if shift > 15 {
429430
i
430431
} else {
431-
16 - (shift as u32) + i
432+
16 - shift + i
432433
}
433434
}
434435
let zero = _mm_set1_epi8(0).as_i8x16();

0 commit comments

Comments
 (0)