-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RegAlloc][NewPM] Plug Greedy RA in codegen pipeline #120557
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RegAlloc][NewPM] Plug Greedy RA in codegen pipeline #120557
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
c779444
to
6345993
Compare
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-aarch64 Author: Akshat Oke (optimisan) ChangesUse
Full diff: https://github.com/llvm/llvm-project/pull/120557.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 28768a72c83fa3..2dbcd7122f6aa8 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1059,7 +1059,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
///
/// A target that uses the standard regalloc pass order for fast or optimized
/// allocation may still override this for per-target regalloc
-/// selection. But -regalloc=... always takes precedence.
+/// selection. But -regalloc-npm=... always takes precedence.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
AddMachinePass &addPass, bool Optimized) const {
@@ -1075,7 +1075,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
- // TODO: Parse Opt.RegAlloc to add register allocator.
+ // Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
+ if (Opt.RegAlloc > RegAllocType::Default) {
+ switch (Opt.RegAlloc) {
+ case RegAllocType::Fast:
+ addPass(RegAllocFastPass());
+ break;
+ case RegAllocType::Greedy:
+ addPass(RAGreedyPass());
+ break;
+ default:
+ report_fatal_error("Register allocator not supported yet.", false);
+ }
+ return;
+ }
+ // -regalloc=default or unspecified, so pick based on the optimization level.
+ derived().addTargetRegisterAllocator(addPass, Optimized);
}
template <typename Derived, typename TargetMachineT>
@@ -1146,20 +1161,23 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addPass(MachineSchedulerPass());
- if (derived().addRegAssignmentOptimized(addPass)) {
- // Allow targets to expand pseudo instructions depending on the choice of
- // registers before MachineCopyPropagation.
- derived().addPostRewrite(addPass);
+ if (auto E = derived().addRegAssignmentOptimized(addPass)) {
+ // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
+ // FIXME: This is not really an error.
+ return;
+ }
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ derived().addPostRewrite(addPass);
- // Copy propagate to forward register uses and try to eliminate COPYs that
- // were not coalesced.
- addPass(MachineCopyPropagationPass());
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(MachineCopyPropagationPass());
- // Run post-ra machine LICM to hoist reloads / remats.
- //
- // FIXME: can this move into MachineLateOptimization?
- addPass(MachineLICMPass());
- }
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(MachineLICMPass());
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 5facdfa825e4cb..abcc248334ed5f 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,12 +188,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
},
"filter=reg-filter;no-clear-vregs")
+// 'all' is the default filter
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"regallocgreedy", "RAGreedyPass",
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
[PB = this](StringRef Params) {
- // TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
- return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
+ return parseRegAllocGreedyFilterFunc(*PB, Params);
}, "reg-filter"
)
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 29bdb9c1746d3c..f146cc7b8028d8 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -20,7 +20,7 @@
namespace llvm {
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
-enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
+enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
// Not one-on-one but mostly corresponding to commandline options in
// TargetPassConfig.cpp.
@@ -53,7 +53,7 @@ struct CGPassBuilderOption {
bool RequiresCodeGenSCCOrder = false;
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
- StringRef RegAlloc = "default";
+ RegAllocType RegAlloc = RegAllocType::Unset;
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
std::string FSProfileFile;
std::string FSRemappingFile;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f64b6e0adb2b32..f9a4af486631eb 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1315,6 +1315,20 @@ parseBoundsCheckingOptions(StringRef Params) {
return Mode;
}
+Expected<RAGreedyPass::Options>
+parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
+ if (Params.empty() || Params == "all") {
+ return RAGreedyPass::Options();
+ }
+ std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
+ if (!Filter) {
+ return make_error<StringError>(
+ formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
+ inconvertibleErrorCode());
+ }
+ return RAGreedyPass::Options{*Filter, Params};
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7256eec89008a5..952bf479827f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -68,6 +68,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -2099,6 +2100,28 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addPass(SIShrinkInstructionsPass());
}
+static const char RegAllocNPMNotSupportedMessage[] =
+ "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
+ "-wwm-regalloc-npm, and -vgpr-regalloc-npm";
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5ba58a92621edb..197476a0f80574 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,8 @@ class AMDGPUCodeGenPassBuilder
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
+ Error addRegAssignmentOptimized(AddMachinePass &) const;
+ Error addRegAssignmentFast(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
index 47aa34e3c01156..a168c2891c7d6f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define void @inst_stores_to_dead_spill_implicit_def_impdef() {
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
index a5d74ef75f0a0a..d9edda47638a3f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-ios -passes=regallocgreedy -o - %s | FileCheck %s
---
name: widget
diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir
index 910bfb858b50f6..e84f0ca2015ce5 100644
--- a/llvm/test/CodeGen/AArch64/pr51516.mir
+++ b/llvm/test/CodeGen/AArch64/pr51516.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=regallocgreedy -verify-machineinstrs -o - %s | FileCheck %s
# Check that we spill %31 and do not rematerialize it since the use operand
# of ADDXri is killed by the STRXui in this block.
diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index b1e7ebe3a7e82b..2773b5f19618a9 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define i64 @test_subreg_spill_fold() { ret i64 0 }
define i64 @test_subreg_spill_fold2() { ret i64 0 }
diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
index 760ae6032230f5..42bba4d1504013 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
# Initially %2 starts out with 2 subranges (one for sub0, and one for
# the rest of the lanes). After %2 is split, after refineSubRanges the
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 52ad7e5355207d..a54fee3a0f964c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -12,8 +12,11 @@
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=fast -O0 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=basic -O3 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc
+; REGALLOC-NPM: -regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, -wwm-regalloc-npm, and -vgpr-regalloc-npm
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
diff --git a/llvm/test/CodeGen/MIR/Generic/runPass.mir b/llvm/test/CodeGen/MIR/Generic/runPass.mir
index 75763c5389b09e..41dd98ff909b0c 100644
--- a/llvm/test/CodeGen/MIR/Generic/runPass.mir
+++ b/llvm/test/CodeGen/MIR/Generic/runPass.mir
@@ -2,6 +2,7 @@
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
+# RUN: llc -passes=regallocgreedy -o - %s | FileCheck %s
# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
index 197c3d8551fc38..de0db97f14bf3c 100644
--- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
+++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
@@ -1,4 +1,5 @@
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
+#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=regallocgreedy
#PR34502. Check HoistSpill works properly after the live range of spilled
#virtual register is cleared.
--- |
diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
index 1402c7c2cbca36..e085e38ae5fe31 100644
--- a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
+++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
@@ -3,6 +3,7 @@
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
+# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocgreedy %s -o - | FileCheck %s
...
---
diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir
index 7ec0404e0f737c..eec8a3939151a6 100644
--- a/llvm/test/CodeGen/X86/limit-split-cost.mir
+++ b/llvm/test/CodeGen/X86/limit-split-cost.mir
@@ -1,5 +1,6 @@
# REQUIRES: asserts
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -passes=regallocgreedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
# Check no global region split is needed because the live range to split is trivially rematerializable.
# CHECK-NOT: Compact region bundles
--- |
diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
index 07f2d350ffd9c0..66c9d8942f3da4 100644
--- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
+++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
@@ -1,12 +1,17 @@
# REQUIRES: amdgpu-registered-target
-# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
-# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER
+# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAFAST
+# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAFAST-BAD-FILTER
-# PASS: regallocfast<filter=sgpr>
-# PASS: regallocfast<filter=wwm>
-# PASS: regallocfast<filter=vgpr>
-# BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+# RUN: llc -mtriple=amdgcn -passes='regallocgreedy<sgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAGREEDY
+# RUN: not llc -mtriple=amdgcn -passes='regallocgreedy<bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAGREEDY-BAD-FILTER
+# RAFAST: regallocfast<filter=sgpr>
+# RAFAST: regallocfast<filter=wwm>
+# RAFAST: regallocfast<filter=vgpr>
+# RAFAST-BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+
+# RAGREEDY: regallocgreedy<sgpr>
+# RAGREEDY-BAD-FILTER: invalid regallocgreedy register filter 'bad-filter'
---
name: f
...
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index 3892fbb8c74f78..0f7aa6284962a2 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -48,10 +48,17 @@
using namespace llvm;
-static cl::opt<std::string>
- RegAlloc("regalloc-npm",
- cl::desc("Register allocator to use for new pass manager"),
- cl::Hidden, cl::init("default"));
+static cl::opt<RegAllocType> RegAlloc(
+ "regalloc-npm", cl::desc("Register allocator to use for new pass manager"),
+ cl::Hidden, cl::init(RegAllocType::Unset),
+ cl::values(
+ clEnumValN(RegAllocType::Default, "default",
+ "Default register allocator"),
+ clEnumValN(RegAllocType::PBQP, "pbqp", "PBQP register allocator"),
+ clEnumValN(RegAllocType::Fast, "fast", "Fast register allocator"),
+ clEnumValN(RegAllocType::Basic, "basic", "Basic register allocator"),
+ clEnumValN(RegAllocType::Greedy, "greedy",
+ "Greedy register allocator")));
static cl::opt<bool>
DebugPM("debug-pass-manager", cl::Hidden,
|
@llvm/pr-subscribers-backend-systemz Author: Akshat Oke (optimisan) ChangesUse
Full diff: https://github.com/llvm/llvm-project/pull/120557.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 28768a72c83fa3..2dbcd7122f6aa8 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1059,7 +1059,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
///
/// A target that uses the standard regalloc pass order for fast or optimized
/// allocation may still override this for per-target regalloc
-/// selection. But -regalloc=... always takes precedence.
+/// selection. But -regalloc-npm=... always takes precedence.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
AddMachinePass &addPass, bool Optimized) const {
@@ -1075,7 +1075,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
- // TODO: Parse Opt.RegAlloc to add register allocator.
+ // Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
+ if (Opt.RegAlloc > RegAllocType::Default) {
+ switch (Opt.RegAlloc) {
+ case RegAllocType::Fast:
+ addPass(RegAllocFastPass());
+ break;
+ case RegAllocType::Greedy:
+ addPass(RAGreedyPass());
+ break;
+ default:
+ report_fatal_error("Register allocator not supported yet.", false);
+ }
+ return;
+ }
+ // -regalloc=default or unspecified, so pick based on the optimization level.
+ derived().addTargetRegisterAllocator(addPass, Optimized);
}
template <typename Derived, typename TargetMachineT>
@@ -1146,20 +1161,23 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addPass(MachineSchedulerPass());
- if (derived().addRegAssignmentOptimized(addPass)) {
- // Allow targets to expand pseudo instructions depending on the choice of
- // registers before MachineCopyPropagation.
- derived().addPostRewrite(addPass);
+ if (auto E = derived().addRegAssignmentOptimized(addPass)) {
+ // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
+ // FIXME: This is not really an error.
+ return;
+ }
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ derived().addPostRewrite(addPass);
- // Copy propagate to forward register uses and try to eliminate COPYs that
- // were not coalesced.
- addPass(MachineCopyPropagationPass());
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(MachineCopyPropagationPass());
- // Run post-ra machine LICM to hoist reloads / remats.
- //
- // FIXME: can this move into MachineLateOptimization?
- addPass(MachineLICMPass());
- }
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(MachineLICMPass());
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 5facdfa825e4cb..abcc248334ed5f 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,12 +188,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
},
"filter=reg-filter;no-clear-vregs")
+// 'all' is the default filter
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"regallocgreedy", "RAGreedyPass",
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
[PB = this](StringRef Params) {
- // TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
- return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
+ return parseRegAllocGreedyFilterFunc(*PB, Params);
}, "reg-filter"
)
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 29bdb9c1746d3c..f146cc7b8028d8 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -20,7 +20,7 @@
namespace llvm {
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
-enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
+enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
// Not one-on-one but mostly corresponding to commandline options in
// TargetPassConfig.cpp.
@@ -53,7 +53,7 @@ struct CGPassBuilderOption {
bool RequiresCodeGenSCCOrder = false;
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
- StringRef RegAlloc = "default";
+ RegAllocType RegAlloc = RegAllocType::Unset;
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
std::string FSProfileFile;
std::string FSRemappingFile;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f64b6e0adb2b32..f9a4af486631eb 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1315,6 +1315,20 @@ parseBoundsCheckingOptions(StringRef Params) {
return Mode;
}
+Expected<RAGreedyPass::Options>
+parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
+ if (Params.empty() || Params == "all") {
+ return RAGreedyPass::Options();
+ }
+ std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
+ if (!Filter) {
+ return make_error<StringError>(
+ formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
+ inconvertibleErrorCode());
+ }
+ return RAGreedyPass::Options{*Filter, Params};
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7256eec89008a5..952bf479827f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -68,6 +68,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -2099,6 +2100,28 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addPass(SIShrinkInstructionsPass());
}
+static const char RegAllocNPMNotSupportedMessage[] =
+ "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
+ "-wwm-regalloc-npm, and -vgpr-regalloc-npm";
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5ba58a92621edb..197476a0f80574 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,8 @@ class AMDGPUCodeGenPassBuilder
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
+ Error addRegAssignmentOptimized(AddMachinePass &) const;
+ Error addRegAssignmentFast(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
index 47aa34e3c01156..a168c2891c7d6f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define void @inst_stores_to_dead_spill_implicit_def_impdef() {
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
index a5d74ef75f0a0a..d9edda47638a3f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-ios -passes=regallocgreedy -o - %s | FileCheck %s
---
name: widget
diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir
index 910bfb858b50f6..e84f0ca2015ce5 100644
--- a/llvm/test/CodeGen/AArch64/pr51516.mir
+++ b/llvm/test/CodeGen/AArch64/pr51516.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=regallocgreedy -verify-machineinstrs -o - %s | FileCheck %s
# Check that we spill %31 and do not rematerialize it since the use operand
# of ADDXri is killed by the STRXui in this block.
diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index b1e7ebe3a7e82b..2773b5f19618a9 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define i64 @test_subreg_spill_fold() { ret i64 0 }
define i64 @test_subreg_spill_fold2() { ret i64 0 }
diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
index 760ae6032230f5..42bba4d1504013 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
# Initially %2 starts out with 2 subranges (one for sub0, and one for
# the rest of the lanes). After %2 is split, after refineSubRanges the
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 52ad7e5355207d..a54fee3a0f964c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -12,8 +12,11 @@
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=fast -O0 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=basic -O3 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc
+; REGALLOC-NPM: -regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, -wwm-regalloc-npm, and -vgpr-regalloc-npm
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
diff --git a/llvm/test/CodeGen/MIR/Generic/runPass.mir b/llvm/test/CodeGen/MIR/Generic/runPass.mir
index 75763c5389b09e..41dd98ff909b0c 100644
--- a/llvm/test/CodeGen/MIR/Generic/runPass.mir
+++ b/llvm/test/CodeGen/MIR/Generic/runPass.mir
@@ -2,6 +2,7 @@
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
+# RUN: llc -passes=regallocgreedy -o - %s | FileCheck %s
# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
index 197c3d8551fc38..de0db97f14bf3c 100644
--- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
+++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
@@ -1,4 +1,5 @@
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
+#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=regallocgreedy
#PR34502. Check HoistSpill works properly after the live range of spilled
#virtual register is cleared.
--- |
diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
index 1402c7c2cbca36..e085e38ae5fe31 100644
--- a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
+++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
@@ -3,6 +3,7 @@
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
+# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocgreedy %s -o - | FileCheck %s
...
---
diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir
index 7ec0404e0f737c..eec8a3939151a6 100644
--- a/llvm/test/CodeGen/X86/limit-split-cost.mir
+++ b/llvm/test/CodeGen/X86/limit-split-cost.mir
@@ -1,5 +1,6 @@
# REQUIRES: asserts
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -passes=regallocgreedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
# Check no global region split is needed because the live range to split is trivially rematerializable.
# CHECK-NOT: Compact region bundles
--- |
diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
index 07f2d350ffd9c0..66c9d8942f3da4 100644
--- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
+++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
@@ -1,12 +1,17 @@
# REQUIRES: amdgpu-registered-target
-# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
-# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER
+# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAFAST
+# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAFAST-BAD-FILTER
-# PASS: regallocfast<filter=sgpr>
-# PASS: regallocfast<filter=wwm>
-# PASS: regallocfast<filter=vgpr>
-# BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+# RUN: llc -mtriple=amdgcn -passes='regallocgreedy<sgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAGREEDY
+# RUN: not llc -mtriple=amdgcn -passes='regallocgreedy<bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAGREEDY-BAD-FILTER
+# RAFAST: regallocfast<filter=sgpr>
+# RAFAST: regallocfast<filter=wwm>
+# RAFAST: regallocfast<filter=vgpr>
+# RAFAST-BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+
+# RAGREEDY: regallocgreedy<sgpr>
+# RAGREEDY-BAD-FILTER: invalid regallocgreedy register filter 'bad-filter'
---
name: f
...
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index 3892fbb8c74f78..0f7aa6284962a2 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -48,10 +48,17 @@
using namespace llvm;
-static cl::opt<std::string>
- RegAlloc("regalloc-npm",
- cl::desc("Register allocator to use for new pass manager"),
- cl::Hidden, cl::init("default"));
+static cl::opt<RegAllocType> RegAlloc(
+ "regalloc-npm", cl::desc("Register allocator to use for new pass manager"),
+ cl::Hidden, cl::init(RegAllocType::Unset),
+ cl::values(
+ clEnumValN(RegAllocType::Default, "default",
+ "Default register allocator"),
+ clEnumValN(RegAllocType::PBQP, "pbqp", "PBQP register allocator"),
+ clEnumValN(RegAllocType::Fast, "fast", "Fast register allocator"),
+ clEnumValN(RegAllocType::Basic, "basic", "Basic register allocator"),
+ clEnumValN(RegAllocType::Greedy, "greedy",
+ "Greedy register allocator")));
static cl::opt<bool>
DebugPM("debug-pass-manager", cl::Hidden,
|
Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( | ||
AddMachinePass &addPass) const { | ||
if (Opt.RegAlloc != RegAllocType::Unset) | ||
report_fatal_error(RegAllocNPMNotSupportedMessage, false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why mix fatal errors and returned errors?
516482f
to
7f36763
Compare
5c2a6b2
to
513da6b
Compare
7f36763
to
686d1cd
Compare
513da6b
to
9ec74b7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with a nit.
@@ -193,12 +193,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( | |||
}, | |||
"filter=reg-filter;no-clear-vregs") | |||
|
|||
// 'all' is the default filter |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// 'all' is the default filter | |
// 'all' is the default filter. |
2224d60
to
c2770f6
Compare
ed7b986
to
3f237ec
Compare
ca1317d
to
88c92e8
Compare
3f237ec
to
1e1e4a7
Compare
1e1e4a7
to
9b4d0f5
Compare
22b89d2
to
082ff42
Compare
9b4d0f5
to
e0ecd0f
Compare
082ff42
to
8441dc6
Compare
This will allow -sgpr-regalloc-npm=greedy to reuse the same parser.
e0ecd0f
to
302b9f0
Compare
Hi, @paperchalice I'd like to merge this for time being so O3 pipeline can be enabled very soon. |
@@ -0,0 +1,6 @@ | |||
# REQUIRES x86_64-registered-target | |||
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-new-pm -O3 -regalloc-npm=fast -print-pipeline-passes %s 2>&1 | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this RUN line (and the one on the next line) will write to the local directory since it's lacking any "-o" flag, right?
If we don't care about the output, can we please add "-o /dev/null" like in "limit-split-cost.mir" above?
The current directory may be write protected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
-filetype=null should be use to discard llc output
Use `-passes="regallocgreedy<[all|sgpr|wwm|vgpr]>` to insert the greedy RA with a filter and `-regalloc-npm=<type>` to control which RA to use in existing pipeline.
Use
-passes="regallocgreedy<[all|sgpr|wwm|vgpr]>
to insert the greedy RA with a filter and-regalloc-npm=<type>
to control which RA to use in existing pipeline.-regalloc-npm=<type>
is not for AMDGPU. Adding options likesgpr-regalloc-npm
for it next.