@@ -1773,8 +1773,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
1773
1773
}
1774
1774
1775
1775
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare (AddIRPass &addPass) const {
1776
+ // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
1777
+ // deleted soon.
1778
+
1779
+ if (EnableLowerKernelArguments)
1780
+ addPass (AMDGPULowerKernelArgumentsPass (TM));
1781
+
1782
+ // This lowering has been placed after codegenprepare to take advantage of
1783
+ // address mode matching (which is why it isn't put with the LDS lowerings).
1784
+ // It could be placed anywhere before uniformity annotations (an analysis
1785
+ // that it changes by splitting up fat pointers into their components)
1786
+ // but has been put before switch lowering and CFG flattening so that those
1787
+ // passes can run on the more optimized control flow this pass creates in
1788
+ // many cases.
1789
+ //
1790
+ // FIXME: This should ideally be put after the LoadStoreVectorizer.
1791
+ // However, due to some annoying facts about ResourceUsageAnalysis,
1792
+ // (especially as exercised in the resource-usage-dead-function test),
1793
+ // we need all the function passes codegenprepare all the way through
1794
+ // said resource usage analysis to run on the call graph produced
1795
+ // before codegenprepare runs (because codegenprepare will knock some
1796
+ // nodes out of the graph, which leads to function-level passes not
1797
+ // being run on them, which causes crashes in the resource usage analysis).
1798
+ addPass (AMDGPULowerBufferFatPointersPass (TM));
1799
+
1776
1800
Base::addCodeGenPrepare (addPass);
1777
1801
1802
+ if (isPassEnabled (EnableLoadStoreVectorizer))
1803
+ addPass (LoadStoreVectorizerPass ());
1804
+
1778
1805
// LowerSwitch pass may introduce unreachable blocks that can cause unexpected
1779
1806
// behavior for subsequent passes. Placing it here seems better that these
1780
1807
// blocks would get cleaned up by UnreachableBlockElim inserted next in the
0 commit comments