@@ -1770,8 +1770,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
1770
1770
}
1771
1771
1772
1772
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare (AddIRPass &addPass) const {
1773
+ // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
1774
+ // deleted soon.
1775
+
1776
+ if (EnableLowerKernelArguments)
1777
+ addPass (AMDGPULowerKernelArgumentsPass (TM));
1778
+
1779
+ // This lowering has been placed after codegenprepare to take advantage of
1780
+ // address mode matching (which is why it isn't put with the LDS lowerings).
1781
+ // It could be placed anywhere before uniformity annotations (an analysis
1782
+ // that it changes by splitting up fat pointers into their components)
1783
+ // but has been put before switch lowering and CFG flattening so that those
1784
+ // passes can run on the more optimized control flow this pass creates in
1785
+ // many cases.
1786
+ //
1787
+ // FIXME: This should ideally be put after the LoadStoreVectorizer.
1788
+ // However, due to some annoying facts about ResourceUsageAnalysis,
1789
+ // (especially as exercised in the resource-usage-dead-function test),
1790
+ // we need all the function passes codegenprepare all the way through
1791
+ // said resource usage analysis to run on the call graph produced
1792
+ // before codegenprepare runs (because codegenprepare will knock some
1793
+ // nodes out of the graph, which leads to function-level passes not
1794
+ // being run on them, which causes crashes in the resource usage analysis).
1795
+ addPass (AMDGPULowerBufferFatPointersPass (TM));
1796
+
1773
1797
Base::addCodeGenPrepare (addPass);
1774
1798
1799
+ if (isPassEnabled (EnableLoadStoreVectorizer))
1800
+ addPass (LoadStoreVectorizerPass ());
1801
+
1775
1802
// LowerSwitch pass may introduce unreachable blocks that can cause unexpected
1776
1803
// behavior for subsequent passes. Placing it here seems better that these
1777
1804
// blocks would get cleaned up by UnreachableBlockElim inserted next in the
@@ -1839,3 +1866,12 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
1839
1866
addPass (SILowerI1CopiesPass ());
1840
1867
return Error::success ();
1841
1868
}
1869
+
1870
+ bool AMDGPUCodeGenPassBuilder::isPassEnabled (const cl::opt<bool > &Opt,
1871
+ CodeGenOptLevel Level) const {
1872
+ if (Opt.getNumOccurrences ())
1873
+ return Opt;
1874
+ if (TM.getOptLevel () < Level)
1875
+ return false ;
1876
+ return Opt;
1877
+ }
0 commit comments