Skip to content

Commit 01a7d4e

Browse files
authored
[AMDGPU] Allow selection of BITOP3 for some 2 opcodes and B32 cases (#122267)
This came up in downstream static analysis - as a dead code. Admittedly, it depends on what the intention was when checking for [`if (NumOpcodes == 2 && IsB32)`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp#L3792C3-L3792C32) and I took a guess that for certain cases the selection should take place. If that's incorrect, that whole if statement can be removed, as it is after a check for: [`if (NumOpcodes < 4)`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp#L3788)
1 parent 4c0a0f7 commit 01a7d4e

File tree

2 files changed

+17
-22
lines changed

2 files changed

+17
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3782,13 +3782,7 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
37823782
if (NumOpcodes < 2 || Src.empty())
37833783
return false;
37843784

3785-
// For a uniform case threshold should be higher to account for moves between
3786-
// VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be in SGPRs
3787-
// and a readtfirstlane after.
3788-
if (NumOpcodes < 4)
3789-
return false;
3790-
3791-
bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
3785+
const bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
37923786
if (NumOpcodes == 2 && IsB32) {
37933787
// Avoid using BITOP3 for OR3, XOR3, AND_OR. This is not faster but makes
37943788
// asm more readable. This cannot be modeled with AddedComplexity because
@@ -3797,6 +3791,11 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
37973791
mi_match(MI, *MRI, m_GOr(m_GOr(m_Reg(), m_Reg()), m_Reg())) ||
37983792
mi_match(MI, *MRI, m_GOr(m_GAnd(m_Reg(), m_Reg()), m_Reg())))
37993793
return false;
3794+
} else if (NumOpcodes < 4) {
3795+
// For a uniform case threshold should be higher to account for moves
3796+
// between VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be
3797+
// in SGPRs and a readtfirstlane after.
3798+
return false;
38003799
}
38013800

38023801
unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;

llvm/test/CodeGen/AMDGPU/bitop3.ll

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) {
5252
;
5353
; GFX950-GISEL-LABEL: not_and_and_and:
5454
; GFX950-GISEL: ; %bb.0:
55-
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
56-
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
55+
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
5756
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
5857
; GFX950-GISEL-NEXT: ; return to shader part epilog
5958
%nota = xor i32 %a, -1
@@ -103,8 +102,7 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
103102
;
104103
; GFX950-GISEL-LABEL: and_and_not_and:
105104
; GFX950-GISEL: ; %bb.0:
106-
; GFX950-GISEL-NEXT: v_not_b32_e32 v2, v2
107-
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
105+
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30
108106
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
109107
; GFX950-GISEL-NEXT: ; return to shader part epilog
110108
%notc = xor i32 %c, -1
@@ -122,8 +120,7 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
122120
;
123121
; GFX950-GISEL-LABEL: and_and_and:
124122
; GFX950-GISEL: ; %bb.0:
125-
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
126-
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
123+
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
127124
; GFX950-GISEL-NEXT: ; return to shader part epilog
128125
%and1 = and i32 %a, %c
129126
%and2 = and i32 %and1, %b
@@ -141,8 +138,7 @@ define amdgpu_ps float @test_12(i32 %a, i32 %b) {
141138
;
142139
; GFX950-GISEL-LABEL: test_12:
143140
; GFX950-GISEL: ; %bb.0:
144-
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
145-
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
141+
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
146142
; GFX950-GISEL-NEXT: ; return to shader part epilog
147143
%nota = xor i32 %a, -1
148144
%and1 = and i32 %nota, %b
@@ -214,9 +210,11 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
214210
;
215211
; GFX950-GISEL-LABEL: test_12_src_overflow:
216212
; GFX950-GISEL: ; %bb.0:
217-
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
218-
; GFX950-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v0
219-
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
213+
; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v0
214+
; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v2
215+
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
216+
; GFX950-GISEL-NEXT: v_and_b32_e32 v2, v3, v4
217+
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8
220218
; GFX950-GISEL-NEXT: ; return to shader part epilog
221219
%nota = xor i32 %a, -1
222220
%notc = xor i32 %c, -1
@@ -242,11 +240,9 @@ define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) {
242240
;
243241
; GFX950-GISEL-LABEL: test_100_src_overflow:
244242
; GFX950-GISEL: ; %bb.0:
245-
; GFX950-GISEL-NEXT: v_or_b32_e32 v3, v2, v0
246-
; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v3
247-
; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v1
243+
; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v2, v0, v2 bitop3:3
248244
; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3
249-
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v0, v4
245+
; GFX950-GISEL-NEXT: v_bitop3_b32 v4, v0, v1, v0 bitop3:0x30
250246
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0
251247
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2
252248
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2

0 commit comments

Comments
 (0)