Skip to content

Commit 2a74bc6

Browse files
committed
Fixups
1 parent 14d2b4c commit 2a74bc6

File tree

3 files changed

+223
-194
lines changed

3 files changed

+223
-194
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+14-13
Original file line numberDiff line numberDiff line change
@@ -4332,13 +4332,9 @@ static bool expandFillPPRFromZPRSlotPseudo(
43324332
UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
43334333

43344334
Register PredReg = AArch64::NoRegister;
4335-
std::optional<ScopedScavengeOrSpill> FindPPR3bReg;
4336-
if (AArch64::PPR_3bRegClass.contains(MI.getOperand(0).getReg()))
4337-
PredReg = MI.getOperand(0).getReg();
4338-
else
4339-
FindPPR3bReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), PredReg,
4340-
AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs,
4341-
PPR3bRegs, SpillSlots.PPRSpillFI);
4335+
ScopedScavengeOrSpill FindPPR3bReg(
4336+
MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0,
4337+
AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI);
43424338

43434339
// Elide NZCV spills if we know it is not used.
43444340
Register NZCVSaveReg = AArch64::NoRegister;
@@ -4350,8 +4346,7 @@ static bool expandFillPPRFromZPRSlotPseudo(
43504346
SpillSlots.GPRSpillFI);
43514347

43524348
#ifndef NDEBUG
4353-
bool Spilled = FindZPRReg.hasSpilled() ||
4354-
(FindPPR3bReg && FindPPR3bReg->hasSpilled()) ||
4349+
bool Spilled = FindZPRReg.hasSpilled() || FindPPR3bReg.hasSpilled() ||
43554350
(FindGPRReg && FindGPRReg->hasSpilled());
43564351
bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
43574352
MI.getFlag(MachineInstr::FrameDestroy);
@@ -4393,7 +4388,7 @@ static bool expandFillPPRFromZPRSlotPseudo(
43934388
.getInstr());
43944389

43954390
propagateFrameFlags(MI, MachineInstrs);
4396-
return FindPPR3bReg && FindPPR3bReg->hasSpilled();
4391+
return FindPPR3bReg.hasSpilled();
43974392
}
43984393

43994394
/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
@@ -4446,6 +4441,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44464441
if (CSRMask)
44474442
ScavengeableRegs.clearBitsInMask(CSRMask);
44484443
// TODO: Allow reusing callee-saved registers that have been saved.
4444+
assert(ScavengeableRegs.count() > 0 && "Expected scavengeable registers");
44494445
return ScavengeableRegs;
44504446
};
44514447

@@ -4471,9 +4467,15 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44714467

44724468
EmergencyStackSlots SpillSlots;
44734469
for (MachineBasicBlock &MBB : MF) {
4470+
// In the case we had to spill a predicate (in the range p0-p7) to reload
4471+
// a predicate (>= p8), additional spill/fill pseudos will be created.
4472+
// These need an additional expansion pass. Note: There will only be at
4473+
// most two expansion passes, as spilling/filling a predicate in the range
4474+
// p0-p7 never requires spilling another predicate.
44744475
for (int Pass = 0; Pass < 2; Pass++) {
44754476
bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos(
44764477
MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
4478+
assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills");
44774479
if (!HasPPRSpills)
44784480
break;
44794481
}
@@ -5524,9 +5526,8 @@ void AArch64FrameLowering::emitRemarks(
55245526
if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
55255527
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
55265528
// spill/fill the predicate as a data vector (so are an FPR acess).
5527-
if (!is_contained({AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO,
5528-
AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO},
5529-
MI.getOpcode()) &&
5529+
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
5530+
MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
55305531
AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
55315532
RegTy = StackAccess::PPR;
55325533
else

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ unsigned AArch64Subtarget::getHwModeSet() const {
414414
//
415415
// FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at
416416
// CPU features.
417-
if (EnableZPRPredicateSpills.getValue() &&
417+
if (EnableZPRPredicateSpills.getValue() && getStreamingHazardSize() > 0 &&
418418
(isStreaming() || isStreamingCompatible())) {
419419
Modes |= (1 << 0);
420420
}

0 commit comments

Comments
 (0)