-
Notifications
You must be signed in to change notification settings - Fork 13.5k
AMDGPU: Declare pass control flags in header #102865
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Declare pass control flags in header #102865
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis will allow them to be shared between the old PM and new PM files. I don't really like needing to expose these globally like this; maybe Full diff: https://github.com/llvm/llvm-project/pull/102865.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index cad4585c5b3013..3409a49fe203f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -74,6 +74,7 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+using namespace llvm::AMDGPU;
namespace {
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
@@ -186,109 +187,95 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR(
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
} // anonymous namespace
-static cl::opt<bool>
-EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
- cl::desc("Run early if-conversion"),
- cl::init(false));
+namespace llvm::AMDGPU {
+cl::opt<bool> EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
+ cl::desc("Run early if-conversion"),
+ cl::init(false));
-static cl::opt<bool>
-OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
- cl::desc("Run pre-RA exec mask optimizations"),
- cl::init(true));
+cl::opt<bool> OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
+ cl::desc("Run pre-RA exec mask optimizations"),
+ cl::init(true));
-static cl::opt<bool>
+cl::opt<bool>
LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
cl::desc("Lower GPU ctor / dtors to globals on the device."),
cl::init(true), cl::Hidden);
// Option to disable vectorizer for tests.
-static cl::opt<bool> EnableLoadStoreVectorizer(
- "amdgpu-load-store-vectorizer",
- cl::desc("Enable load store vectorizer"),
- cl::init(true),
- cl::Hidden);
+cl::opt<bool>
+ EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer",
+ cl::desc("Enable load store vectorizer"),
+ cl::init(true), cl::Hidden);
// Option to control global loads scalarization
-static cl::opt<bool> ScalarizeGlobal(
- "amdgpu-scalarize-global-loads",
- cl::desc("Enable global load scalarization"),
- cl::init(true),
- cl::Hidden);
+cl::opt<bool> ScalarizeGlobal("amdgpu-scalarize-global-loads",
+ cl::desc("Enable global load scalarization"),
+ cl::init(true), cl::Hidden);
// Option to run internalize pass.
-static cl::opt<bool> InternalizeSymbols(
- "amdgpu-internalize-symbols",
- cl::desc("Enable elimination of non-kernel functions and unused globals"),
- cl::init(false),
- cl::Hidden);
+cl::opt<bool> InternalizeSymbols(
+ "amdgpu-internalize-symbols",
+ cl::desc("Enable elimination of non-kernel functions and unused globals"),
+ cl::init(false), cl::Hidden);
// Option to inline all early.
-static cl::opt<bool> EarlyInlineAll(
- "amdgpu-early-inline-all",
- cl::desc("Inline all functions early"),
- cl::init(false),
- cl::Hidden);
+cl::opt<bool> EarlyInlineAll("amdgpu-early-inline-all",
+ cl::desc("Inline all functions early"),
+ cl::init(false), cl::Hidden);
-static cl::opt<bool> RemoveIncompatibleFunctions(
+cl::opt<bool> RemoveIncompatibleFunctions(
"amdgpu-enable-remove-incompatible-functions", cl::Hidden,
cl::desc("Enable removal of functions when they"
"use features not supported by the target GPU"),
cl::init(true));
-static cl::opt<bool> EnableSDWAPeephole(
- "amdgpu-sdwa-peephole",
- cl::desc("Enable SDWA peepholer"),
- cl::init(true));
+cl::opt<bool> EnableSDWAPeephole("amdgpu-sdwa-peephole",
+ cl::desc("Enable SDWA peepholer"),
+ cl::init(true));
-static cl::opt<bool> EnableDPPCombine(
- "amdgpu-dpp-combine",
- cl::desc("Enable DPP combiner"),
- cl::init(true));
+cl::opt<bool> EnableDPPCombine("amdgpu-dpp-combine",
+ cl::desc("Enable DPP combiner"), cl::init(true));
// Enable address space based alias analysis
-static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
- cl::desc("Enable AMDGPU Alias Analysis"),
- cl::init(true));
+cl::opt<bool>
+ EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
+ cl::desc("Enable AMDGPU Alias Analysis"),
+ cl::init(true));
// Option to run late CFG structurizer
-static cl::opt<bool, true> LateCFGStructurize(
- "amdgpu-late-structurize",
- cl::desc("Enable late CFG structurization"),
- cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
- cl::Hidden);
+cl::opt<bool, true> LateCFGStructurize(
+ "amdgpu-late-structurize", cl::desc("Enable late CFG structurization"),
+ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden);
// Disable structurizer-based control-flow lowering in order to test convergence
// control tokens. This should eventually be replaced by the wave-transform.
-static cl::opt<bool, true> DisableStructurizer(
+cl::opt<bool, true> DisableStructurizer(
"amdgpu-disable-structurizer",
cl::desc("Disable structurizer for experiments; produces unusable code"),
cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden);
// Enable lib calls simplifications
-static cl::opt<bool> EnableLibCallSimplify(
- "amdgpu-simplify-libcall",
- cl::desc("Enable amdgpu library simplifications"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool> EnableLowerKernelArguments(
- "amdgpu-ir-lower-kernel-arguments",
- cl::desc("Lower kernel argument loads in IR pass"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool> EnableRegReassign(
- "amdgpu-reassign-regs",
- cl::desc("Enable register reassign optimizations on gfx10+"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool> OptVGPRLiveRange(
+cl::opt<bool>
+ EnableLibCallSimplify("amdgpu-simplify-libcall",
+ cl::desc("Enable amdgpu library simplifications"),
+ cl::init(true), cl::Hidden);
+
+cl::opt<bool> EnableLowerKernelArguments(
+ "amdgpu-ir-lower-kernel-arguments",
+ cl::desc("Lower kernel argument loads in IR pass"), cl::init(true),
+ cl::Hidden);
+
+cl::opt<bool> EnableRegReassign(
+ "amdgpu-reassign-regs",
+ cl::desc("Enable register reassign optimizations on gfx10+"),
+ cl::init(true), cl::Hidden);
+
+cl::opt<bool> OptVGPRLiveRange(
"amdgpu-opt-vgpr-liverange",
cl::desc("Enable VGPR liverange optimizations for if-else structure"),
cl::init(true), cl::Hidden);
-static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
+cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
"amdgpu-atomic-optimizer-strategy",
cl::desc("Select DPP or Iterative strategy for scan"),
cl::init(ScanOptions::Iterative),
@@ -299,91 +286,85 @@ static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));
// Enable Mode register optimization
-static cl::opt<bool> EnableSIModeRegisterPass(
- "amdgpu-mode-register",
- cl::desc("Enable mode register pass"),
- cl::init(true),
- cl::Hidden);
+cl::opt<bool> EnableSIModeRegisterPass("amdgpu-mode-register",
+ cl::desc("Enable mode register pass"),
+ cl::init(true), cl::Hidden);
// Enable GFX11.5+ s_singleuse_vdst insertion
-static cl::opt<bool>
+cl::opt<bool>
EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
cl::desc("Enable s_singleuse_vdst insertion"),
cl::init(false), cl::Hidden);
// Enable GFX11+ s_delay_alu insertion
-static cl::opt<bool>
- EnableInsertDelayAlu("amdgpu-enable-delay-alu",
- cl::desc("Enable s_delay_alu insertion"),
- cl::init(true), cl::Hidden);
+cl::opt<bool> EnableInsertDelayAlu("amdgpu-enable-delay-alu",
+ cl::desc("Enable s_delay_alu insertion"),
+ cl::init(true), cl::Hidden);
// Enable GFX11+ VOPD
-static cl::opt<bool>
- EnableVOPD("amdgpu-enable-vopd",
- cl::desc("Enable VOPD, dual issue of VALU in wave32"),
- cl::init(true), cl::Hidden);
+cl::opt<bool> EnableVOPD("amdgpu-enable-vopd",
+ cl::desc("Enable VOPD, dual issue of VALU in wave32"),
+ cl::init(true), cl::Hidden);
// Option is used in lit tests to prevent deadcoding of patterns inspected.
-static cl::opt<bool>
-EnableDCEInRA("amdgpu-dce-in-ra",
- cl::init(true), cl::Hidden,
- cl::desc("Enable machine DCE inside regalloc"));
-
-static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
- cl::desc("Adjust wave priority"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool> EnableScalarIRPasses(
- "amdgpu-scalar-ir-passes",
- cl::desc("Enable scalar IR passes"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool, true> EnableStructurizerWorkarounds(
+cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden,
+ cl::desc("Enable machine DCE inside regalloc"));
+
+cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
+ cl::desc("Adjust wave priority"),
+ cl::init(false), cl::Hidden);
+
+cl::opt<bool> EnableScalarIRPasses("amdgpu-scalar-ir-passes",
+ cl::desc("Enable scalar IR passes"),
+ cl::init(true), cl::Hidden);
+
+cl::opt<bool, true> EnableStructurizerWorkarounds(
"amdgpu-enable-structurizer-workarounds",
cl::desc("Enable workarounds for the StructurizeCFG pass"),
cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
cl::init(true), cl::Hidden);
-static cl::opt<bool, true> EnableLowerModuleLDS(
+cl::opt<bool, true> EnableLowerModuleLDS(
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
cl::Hidden);
-static cl::opt<bool> EnablePreRAOptimizations(
- "amdgpu-enable-pre-ra-optimizations",
- cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
- cl::Hidden);
+cl::opt<bool>
+ EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations",
+ cl::desc("Enable Pre-RA optimizations pass"),
+ cl::init(true), cl::Hidden);
-static cl::opt<bool> EnablePromoteKernelArguments(
+cl::opt<bool> EnablePromoteKernelArguments(
"amdgpu-enable-promote-kernel-arguments",
cl::desc("Enable promotion of flat kernel pointer arguments to global"),
cl::Hidden, cl::init(true));
-static cl::opt<bool> EnableImageIntrinsicOptimizer(
+cl::opt<bool> EnableImageIntrinsicOptimizer(
"amdgpu-enable-image-intrinsic-optimizer",
cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
cl::Hidden);
-static cl::opt<bool>
+cl::opt<bool>
EnableLoopPrefetch("amdgpu-loop-prefetch",
cl::desc("Enable loop data prefetch on AMDGPU"),
cl::Hidden, cl::init(false));
-static cl::opt<bool> EnableMaxIlpSchedStrategy(
+cl::opt<bool> EnableMaxIlpSchedStrategy(
"amdgpu-enable-max-ilp-scheduling-strategy",
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
cl::Hidden, cl::init(false));
-static cl::opt<bool> EnableRewritePartialRegUses(
+cl::opt<bool> EnableRewritePartialRegUses(
"amdgpu-enable-rewrite-partial-reg-uses",
cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
cl::Hidden);
-static cl::opt<bool> EnableHipStdPar(
- "amdgpu-enable-hipstdpar",
- cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
- cl::Hidden);
+cl::opt<bool>
+ EnableHipStdPar("amdgpu-enable-hipstdpar",
+ cl::desc("Enable HIP Standard Parallelism Offload support"),
+ cl::init(false), cl::Hidden);
+
+} // namespace llvm::AMDGPU
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 4d39ad2b415052..f01e26a846f433 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -16,12 +16,53 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include <optional>
#include <utility>
namespace llvm {
+enum class ScanOptions;
+
+namespace AMDGPU {
+
+extern cl::opt<bool> EnableEarlyIfConversion;
+extern cl::opt<bool> OptExecMaskPreRA;
+extern cl::opt<bool> LowerCtorDtor;
+extern cl::opt<bool> EnableLoadStoreVectorizer;
+extern cl::opt<bool> ScalarizeGlobal;
+extern cl::opt<bool> InternalizeSymbols;
+extern cl::opt<bool> EarlyInlineAll;
+extern cl::opt<bool> RemoveIncompatibleFunctions;
+extern cl::opt<bool> EnableSDWAPeephole;
+extern cl::opt<bool> EnableDPPCombine;
+extern cl::opt<bool> EnableAMDGPUAliasAnalysis;
+extern cl::opt<bool, true> LateCFGStructurize;
+extern cl::opt<bool, true> DisableStructurizer;
+extern cl::opt<bool> EnableLibCallSimplify;
+extern cl::opt<bool> EnableLowerKernelArguments;
+extern cl::opt<bool> EnableRegReassign;
+extern cl::opt<bool> OptVGPRLiveRange;
+extern cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy;
+extern cl::opt<bool> EnableSIModeRegisterPass;
+extern cl::opt<bool> EnableInsertSingleUseVDST;
+extern cl::opt<bool> EnableInsertDelayAlu;
+extern cl::opt<bool> EnableVOPD;
+extern cl::opt<bool> EnableDCEInRA;
+extern cl::opt<bool> EnableSetWavePriority;
+extern cl::opt<bool> EnableScalarIRPasses;
+extern cl::opt<bool, true> EnableStructurizerWorkarounds;
+extern cl::opt<bool, true> EnableLowerModuleLDS;
+extern cl::opt<bool> EnablePreRAOptimizations;
+extern cl::opt<bool> EnablePromoteKernelArguments;
+extern cl::opt<bool> EnableImageIntrinsicOptimizer;
+extern cl::opt<bool> EnableLoopPrefetch;
+extern cl::opt<bool> EnableMaxIlpSchedStrategy;
+extern cl::opt<bool> EnableRewritePartialRegUses;
+extern cl::opt<bool> EnableHipStdPar;
+} // namespace AMDGPU
+
//===----------------------------------------------------------------------===//
// AMDGPU Target Machine (R600+)
//===----------------------------------------------------------------------===//
|
cc51e15
to
c5f7f3c
Compare
414bc71
to
64679cb
Compare
c5f7f3c
to
673d0b7
Compare
64679cb
to
c5882b7
Compare
Yep, I also do not like extern cl::opt. |
Agree, it is a bad choice to have extern cl::opt and expose these options globally. But, combining Legacy and new PM into a single file would make this file quite complex. We have both R600 and AMDGCN pipeline. There is an AMDGPU base class on top both in the legacy and in the new path. Would that be ok to have all of them in a single place? |
673d0b7
to
3e142ea
Compare
This will allow them to be shared between the old PM and new PM files. I don't really like needing to expose these globally like this; maybe it would be better to just move TargetPassConfig and the CodeGenPassBuilder into one common file?
c5882b7
to
dc7ab09
Compare
3e142ea
to
4f98ff6
Compare
Moved AMDGPUCodeGenPassBuilder into AMDGPUTargetMachine instead |
This will allow them to be shared between the old PM and new PM files.
I don't really like needing to expose these globally like this; maybe
it would be better to just move TargetPassConfig and the CodeGenPassBuilder
into one common file?