From c564ce0e41c3f7591d547753c33fcdcbbdbcb5ca Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 12 Aug 2024 15:26:25 +0400 Subject: [PATCH] AMDGPU/NewPM: Start filling out addIRPasses This is not complete, but gets AtomicExpand running. I was able to get further than I expected; we're quite close to having all the IR codegen passes ported. --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 100 ++++++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 3 + 2 files changed, 103 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b27746f9d6a55..c89d9ff906d14 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -65,10 +65,16 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/FlattenCFG.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopDataPrefetch.h" +#include "llvm/Transforms/Scalar/NaryReassociate.h" +#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" #include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/FixIrreducible.h" @@ -1769,6 +1775,70 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( ShadowStackGCLoweringPass>(); } +void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { + // TODO: Missing AMDGPURemoveIncompatibleFunctions + + addPass(AMDGPUPrintfRuntimeBindingPass()); + if (LowerCtorDtor) + addPass(AMDGPUCtorDtorLoweringPass()); + + if (isPassEnabled(EnableImageIntrinsicOptimizer)) + addPass(AMDGPUImageIntrinsicOptimizerPass(TM)); + + // This can be disabled by passing ::Disable here or on the command line + // with --expand-variadics-override=disable. + addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering)); + + addPass(AMDGPUAlwaysInlinePass()); + addPass(AlwaysInlinerPass()); + + // TODO: Missing OpenCLEnqueuedBlockLowering + + // Runs before PromoteAlloca so the latter can account for function uses + if (EnableLowerModuleLDS) + addPass(AMDGPULowerModuleLDSPass(TM)); + + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(InferAddressSpacesPass()); + + // Run atomic optimizer before Atomic Expand + if (TM.getOptLevel() >= CodeGenOptLevel::Less && + (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) + addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy)); + + addPass(AtomicExpandPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::None) { + addPass(AMDGPUPromoteAllocaPass(TM)); + if (isPassEnabled(EnableScalarIRPasses)) + addStraightLineScalarOptimizationPasses(addPass); + + // TODO: Handle EnableAMDGPUAliasAnalysis + + // TODO: May want to move later or split into an early and late one. + addPass(AMDGPUCodeGenPreparePass(TM)); + + // TODO: LICM + } + + Base::addIRPasses(addPass); + + // EarlyCSE is not always strong enough to clean up what LSR produces. For + // example, GVN can combine + // + // %0 = add %a, %b + // %1 = add %b, %a + // + // and + // + // %0 = shl nsw %a, 2 + // %1 = shl %a, 2 + // + // but EarlyCSE can do neither of them. + if (isPassEnabled(EnableScalarIRPasses)) + addEarlyCSEOrGVNPass(addPass); +} + void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be // deleted soon. @@ -1875,3 +1945,33 @@ bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt &Opt, return false; return Opt; } + +void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const { + if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) + addPass(GVNPass()); + else + addPass(EarlyCSEPass()); +} + +void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses( + AddIRPass &addPass) const { + if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive)) + addPass(LoopDataPrefetchPass()); + + addPass(SeparateConstOffsetFromGEPPass()); + + // ReassociateGEPs exposes more opportunities for SLSR. See + // the example in reassociate-geps-and-slsr.ll. + addPass(StraightLineStrengthReducePass()); + + // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or + // EarlyCSE can reuse. + addEarlyCSEOrGVNPass(addPass); + + // Run NaryReassociate after EarlyCSE/GVN to be more effective. + addPass(NaryReassociatePass()); + + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(EarlyCSEPass()); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 9ee406154b9b7..27ff0da5e812f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -172,6 +172,7 @@ class AMDGPUCodeGenPassBuilder const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC); + void addIRPasses(AddIRPass &) const; void addCodeGenPrepare(AddIRPass &) const; void addPreISel(AddIRPass &addPass) const; void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const; @@ -182,6 +183,8 @@ class AMDGPUCodeGenPassBuilder /// given that a pass shall work at an optimization \p Level minimum. bool isPassEnabled(const cl::opt &Opt, CodeGenOptLevel Level = CodeGenOptLevel::Default) const; + void addEarlyCSEOrGVNPass(AddIRPass &) const; + void addStraightLineScalarOptimizationPasses(AddIRPass &) const; }; } // end namespace llvm