Skip to content

Commit d537606

Browse files
committed
AMDGPU/NewPM: Fill out passes in addCodeGenPrepare
AMDGPUAnnotateKernelFeatures hasn't been ported yet, but it should be soon removable.
1 parent 7022498 commit d537606

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+36
Original file line numberDiff line numberDiff line change
@@ -1770,8 +1770,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
17701770
}
17711771

17721772
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
1773+
// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
1774+
// deleted soon.
1775+
1776+
if (EnableLowerKernelArguments)
1777+
addPass(AMDGPULowerKernelArgumentsPass(TM));
1778+
1779+
// This lowering has been placed after codegenprepare to take advantage of
1780+
// address mode matching (which is why it isn't put with the LDS lowerings).
1781+
// It could be placed anywhere before uniformity annotations (an analysis
1782+
// that it changes by splitting up fat pointers into their components)
1783+
// but has been put before switch lowering and CFG flattening so that those
1784+
// passes can run on the more optimized control flow this pass creates in
1785+
// many cases.
1786+
//
1787+
// FIXME: This should ideally be put after the LoadStoreVectorizer.
1788+
// However, due to some annoying facts about ResourceUsageAnalysis,
1789+
// (especially as exercised in the resource-usage-dead-function test),
1790+
// we need all the function passes codegenprepare all the way through
1791+
// said resource usage analysis to run on the call graph produced
1792+
// before codegenprepare runs (because codegenprepare will knock some
1793+
// nodes out of the graph, which leads to function-level passes not
1794+
// being run on them, which causes crashes in the resource usage analysis).
1795+
addPass(AMDGPULowerBufferFatPointersPass(TM));
1796+
17731797
Base::addCodeGenPrepare(addPass);
17741798

1799+
if (isPassEnabled(EnableLoadStoreVectorizer))
1800+
addPass(LoadStoreVectorizerPass());
1801+
17751802
// LowerSwitch pass may introduce unreachable blocks that can cause unexpected
17761803
// behavior for subsequent passes. Placing it here seems better that these
17771804
// blocks would get cleaned up by UnreachableBlockElim inserted next in the
@@ -1839,3 +1866,12 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
18391866
addPass(SILowerI1CopiesPass());
18401867
return Error::success();
18411868
}
1869+
1870+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
1871+
CodeGenOptLevel Level) const {
1872+
if (Opt.getNumOccurrences())
1873+
return Opt;
1874+
if (TM.getOptLevel() < Level)
1875+
return false;
1876+
return Opt;
1877+
}

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

+6
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ class AMDGPUCodeGenPassBuilder
176176
void addPreISel(AddIRPass &addPass) const;
177177
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
178178
Error addInstSelector(AddMachinePass &) const;
179+
180+
/// Check if a pass is enabled given \p Opt option. The option always
181+
/// overrides defaults if explicitly used. Otherwise its default will be used
182+
/// given that a pass shall work at an optimization \p Level minimum.
183+
bool isPassEnabled(const cl::opt<bool> &Opt,
184+
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
179185
};
180186

181187
} // end namespace llvm

0 commit comments

Comments
 (0)