From 1b5f9f7525209e8c029e7c2cfd35bd43d35980d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Mon, 4 Sep 2023 21:10:15 +0200 Subject: [PATCH] AMDGPU: Scratch instructions are trivially disjoint from SMEM and buffer instructions Scratch instructions are always in addrspace(5), which can only alias with flat (and itself). SMEM and buffer instructions can never reference those address spaces, so they are trivially disjoint. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 17 ++++++++++++++--- llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll | 11 +++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5b553b1136b34..38b5e0114903c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3425,19 +3425,30 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, if (isMUBUF(MIb) || isMTBUF(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(MIb) && !isSMRD(MIb); + if (isFLAT(MIb)) + return isFLATScratch(MIb); + + return !isSMRD(MIb); } if (isSMRD(MIa)) { if (isSMRD(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(MIb) && !isMUBUF(MIb) && !isMTBUF(MIb); + if (isFLAT(MIb)) + return isFLATScratch(MIb); + + return !isMUBUF(MIb) && !isMTBUF(MIb); } if (isFLAT(MIa)) { - if (isFLAT(MIb)) + if (isFLAT(MIb)) { + if ((isFLATScratch(MIa) && isFLATGlobal(MIb)) || + (isFLATGlobal(MIa) && isFLATScratch(MIb))) + return true; + return checkInstOffsetsDoNotOverlap(MIa, MIb); + } return false; } diff --git a/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll b/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll index 49e434e2dd30c..29c82db6f8204 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll +++ b/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll @@ -5,15 +5,14 @@ define amdgpu_gfx void @example(<4 x i32> inreg %rsrc, ptr addrspace(5) %src, i3 ; CHECK-LABEL: example: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_add_nc_u32_e32 v3, 4, v0 +; CHECK-NEXT: s_clause 0x1 ; CHECK-NEXT: scratch_load_b32 v2, v0, off -; CHECK-NEXT: v_add_nc_u32_e32 v0, 4, v0 +; CHECK-NEXT: scratch_load_b32 v3, v3, off ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_b32 v2, v1, s[4:7], 0 offen -; CHECK-NEXT: scratch_load_b32 v0, v0, off -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_b32 v0, v1, s[4:7], 0 offen offset:4 +; CHECK-NEXT: buffer_store_b64 v[2:3], v1, s[4:7], 0 offen ; CHECK-NEXT: s_setpc_b64 s[30:31] -; + %x0 = load i32, ptr addrspace(5) %src call void @llvm.amdgcn.raw.buffer.store.i32(i32 %x0, <4 x i32> %rsrc, i32 %dst, i32 0, i32 0) %src1 = getelementptr i8, ptr addrspace(5) %src, i32 4