Skip to content

Commit ece9d35

Browse files
aemersontstellar
authored andcommitted
[GlobalISel] Fix store merging incorrectly classifying an unknown index expr as 0. (#90375)
During analysis, we incorrectly leave the offset part of an address info struct as zero, when in actual fact we failed to decompose it into base + offset. This results in incorrectly assuming that the address is adjacent to another store addr. To fix this we wrap the offset in an optional<> so we can distinguish between real zero and unknown. Fixes issue #90242 (cherry picked from commit 19f4d68)
1 parent a7b8b89 commit ece9d35

File tree

4 files changed

+115
-34
lines changed

4 files changed

+115
-34
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h

+16-4
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,23 @@ struct LegalityQuery;
3535
class MachineRegisterInfo;
3636
namespace GISelAddressing {
3737
/// Helper struct to store a base, index and offset that forms an address
38-
struct BaseIndexOffset {
38+
class BaseIndexOffset {
39+
private:
3940
Register BaseReg;
4041
Register IndexReg;
41-
int64_t Offset = 0;
42-
bool IsIndexSignExt = false;
42+
std::optional<int64_t> Offset;
43+
44+
public:
45+
BaseIndexOffset() = default;
46+
Register getBase() { return BaseReg; }
47+
Register getBase() const { return BaseReg; }
48+
Register getIndex() { return IndexReg; }
49+
Register getIndex() const { return IndexReg; }
50+
void setBase(Register NewBase) { BaseReg = NewBase; }
51+
void setIndex(Register NewIndex) { IndexReg = NewIndex; }
52+
void setOffset(std::optional<int64_t> NewOff) { Offset = NewOff; }
53+
bool hasValidOffset() const { return Offset.has_value(); }
54+
int64_t getOffset() const { return *Offset; }
4355
};
4456

4557
/// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
@@ -89,7 +101,7 @@ class LoadStoreOpt : public MachineFunctionPass {
89101
// order stores are writing to incremeneting consecutive addresses. So when
90102
// we walk the block in reverse order, the next eligible store must write to
91103
// an offset one store width lower than CurrentLowestOffset.
92-
uint64_t CurrentLowestOffset;
104+
int64_t CurrentLowestOffset;
93105
SmallVector<GStore *> Stores;
94106
// A vector of MachineInstr/unsigned pairs to denote potential aliases that
95107
// need to be checked before the candidate is considered safe to merge. The

llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp

+28-20
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,20 @@ BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
8484
MachineRegisterInfo &MRI) {
8585
BaseIndexOffset Info;
8686
Register PtrAddRHS;
87-
if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
88-
Info.BaseReg = Ptr;
89-
Info.IndexReg = Register();
90-
Info.IsIndexSignExt = false;
87+
Register BaseReg;
88+
if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) {
89+
Info.setBase(Ptr);
90+
Info.setOffset(0);
9191
return Info;
9292
}
93-
93+
Info.setBase(BaseReg);
9494
auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
9595
if (RHSCst)
96-
Info.Offset = RHSCst->Value.getSExtValue();
96+
Info.setOffset(RHSCst->Value.getSExtValue());
9797

9898
// Just recognize a simple case for now. In future we'll need to match
9999
// indexing patterns for base + index + constant.
100-
Info.IndexReg = PtrAddRHS;
101-
Info.IsIndexSignExt = false;
100+
Info.setIndex(PtrAddRHS);
102101
return Info;
103102
}
104103

@@ -114,15 +113,16 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
114113
BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
115114
BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
116115

117-
if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
116+
if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid())
118117
return false;
119118

120119
int64_t Size1 = LdSt1->getMemSize();
121120
int64_t Size2 = LdSt2->getMemSize();
122121

123122
int64_t PtrDiff;
124-
if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
125-
PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
123+
if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
124+
BasePtr1.hasValidOffset()) {
125+
PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
126126
// If the size of memory access is unknown, do not use it to do analysis.
127127
// One example of unknown size memory access is to load/store scalable
128128
// vector objects on the stack.
@@ -151,8 +151,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
151151
// able to calculate their relative offset if at least one arises
152152
// from an alloca. However, these allocas cannot overlap and we
153153
// can infer there is no alias.
154-
auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
155-
auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
154+
auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI);
155+
auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI);
156156
if (!Base0Def || !Base1Def)
157157
return false; // Couldn't tell anything.
158158

@@ -520,16 +520,20 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
520520

521521
Register StoreAddr = StoreMI.getPointerReg();
522522
auto BIO = getPointerInfo(StoreAddr, *MRI);
523-
Register StoreBase = BIO.BaseReg;
524-
uint64_t StoreOffCst = BIO.Offset;
523+
Register StoreBase = BIO.getBase();
525524
if (C.Stores.empty()) {
525+
C.BasePtr = StoreBase;
526+
if (!BIO.hasValidOffset()) {
527+
C.CurrentLowestOffset = 0;
528+
} else {
529+
C.CurrentLowestOffset = BIO.getOffset();
530+
}
526531
// This is the first store of the candidate.
527532
// If the offset can't possibly allow for a lower addressed store with the
528533
// same base, don't bother adding it.
529-
if (StoreOffCst < ValueTy.getSizeInBytes())
534+
if (BIO.hasValidOffset() &&
535+
BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
530536
return false;
531-
C.BasePtr = StoreBase;
532-
C.CurrentLowestOffset = StoreOffCst;
533537
C.Stores.emplace_back(&StoreMI);
534538
LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
535539
<< StoreMI);
@@ -549,8 +553,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
549553
// writes to the next lowest adjacent address.
550554
if (C.BasePtr != StoreBase)
551555
return false;
552-
if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
553-
static_cast<uint64_t>(StoreOffCst))
556+
// If we don't have a valid offset, we can't guarantee to be an adjacent
557+
// offset.
558+
if (!BIO.hasValidOffset())
559+
return false;
560+
if ((C.CurrentLowestOffset -
561+
static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
554562
return false;
555563

556564
// This writes to an adjacent address. Allow it.

llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll

+19
Original file line numberDiff line numberDiff line change
@@ -323,3 +323,22 @@ define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_p
323323
store i32 14, ptr %addr4
324324
ret i32 %safeld
325325
}
326+
327+
@G = external global [10 x i32]
328+
329+
define void @invalid_zero_offset_no_merge(i64 %0) {
330+
; CHECK-LABEL: invalid_zero_offset_no_merge:
331+
; CHECK: ; %bb.0:
332+
; CHECK-NEXT: Lloh0:
333+
; CHECK-NEXT: adrp x8, _G@GOTPAGE
334+
; CHECK-NEXT: Lloh1:
335+
; CHECK-NEXT: ldr x8, [x8, _G@GOTPAGEOFF]
336+
; CHECK-NEXT: str wzr, [x8, x0, lsl #2]
337+
; CHECK-NEXT: str wzr, [x8, #4]
338+
; CHECK-NEXT: ret
339+
; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
340+
%2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
341+
store i32 0, ptr %2, align 4
342+
store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
343+
ret void
344+
}

llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir

+52-10
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,13 @@
162162
ret void
163163
}
164164

165+
@G = external global [10 x i32]
166+
define void @invalid_zero_offset_no_merge(i64 %0) {
167+
%2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
168+
store i32 0, ptr %2, align 4
169+
store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
170+
ret void
171+
}
165172
...
166173
---
167174
name: test_simple_2xs8
@@ -582,13 +589,11 @@ liveins:
582589
frameInfo:
583590
maxAlignment: 1
584591
machineFunctionInfo: {}
592+
# The store to ptr2 prevents merging into a single store.
593+
# We can still merge the stores into addr1 and addr2.
585594
body: |
586595
bb.1 (%ir-block.0):
587596
liveins: $x0, $x1
588-
589-
; The store to ptr2 prevents merging into a single store.
590-
; We can still merge the stores into addr1 and addr2.
591-
592597
; CHECK-LABEL: name: test_alias_4xs16
593598
; CHECK: liveins: $x0, $x1
594599
; CHECK-NEXT: {{ $}}
@@ -639,10 +644,10 @@ liveins:
639644
frameInfo:
640645
maxAlignment: 1
641646
machineFunctionInfo: {}
647+
# Here store of 5 and 9 can be merged, others have aliasing barriers.
642648
body: |
643649
bb.1 (%ir-block.0):
644650
liveins: $x0, $x1, $x2
645-
; Here store of 5 and 9 can be merged, others have aliasing barriers.
646651
; CHECK-LABEL: name: test_alias2_4xs16
647652
; CHECK: liveins: $x0, $x1, $x2
648653
; CHECK-NEXT: {{ $}}
@@ -698,12 +703,11 @@ liveins:
698703
frameInfo:
699704
maxAlignment: 1
700705
machineFunctionInfo: {}
706+
# No merging can be done here.
701707
body: |
702708
bb.1 (%ir-block.0):
703709
liveins: $x0, $x1, $x2, $x3
704710
705-
; No merging can be done here.
706-
707711
; CHECK-LABEL: name: test_alias3_4xs16
708712
; CHECK: liveins: $x0, $x1, $x2, $x3
709713
; CHECK-NEXT: {{ $}}
@@ -767,12 +771,10 @@ stack:
767771
- { id: 0, name: a1, size: 24, alignment: 4 }
768772
- { id: 1, name: a2, size: 4, alignment: 4 }
769773
machineFunctionInfo: {}
774+
# Can merge because the load is from a different alloca and can't alias.
770775
body: |
771776
bb.1 (%ir-block.0):
772777
liveins: $x0
773-
774-
; Can merge because the load is from a different alloca and can't alias.
775-
776778
; CHECK-LABEL: name: test_alias_allocas_2xs32
777779
; CHECK: liveins: $x0
778780
; CHECK-NEXT: {{ $}}
@@ -826,3 +828,43 @@ body: |
826828
RET_ReallyLR
827829
828830
...
831+
---
832+
name: invalid_zero_offset_no_merge
833+
alignment: 4
834+
tracksRegLiveness: true
835+
liveins:
836+
- { reg: '$x0' }
837+
frameInfo:
838+
maxAlignment: 1
839+
machineFunctionInfo: {}
840+
body: |
841+
bb.1 (%ir-block.1):
842+
liveins: $x0
843+
844+
; CHECK-LABEL: name: invalid_zero_offset_no_merge
845+
; CHECK: liveins: $x0
846+
; CHECK-NEXT: {{ $}}
847+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
848+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
849+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64)
850+
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @G
851+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[SHL]](s64)
852+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
853+
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.2)
854+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
855+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[GV]], [[C2]](s64)
856+
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
857+
; CHECK-NEXT: RET_ReallyLR
858+
%0:_(s64) = COPY $x0
859+
%9:_(s64) = G_CONSTANT i64 2
860+
%3:_(s64) = G_SHL %0, %9(s64)
861+
%1:_(p0) = G_GLOBAL_VALUE @G
862+
%4:_(p0) = G_PTR_ADD %1, %3(s64)
863+
%6:_(s32) = G_CONSTANT i32 0
864+
G_STORE %6(s32), %4(p0) :: (store (s32) into %ir.2)
865+
%8:_(s64) = G_CONSTANT i64 4
866+
%7:_(p0) = nuw G_PTR_ADD %1, %8(s64)
867+
G_STORE %6(s32), %7(p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
868+
RET_ReallyLR
869+
870+
...

0 commit comments

Comments
 (0)