Skip to content

[RegAlloc][NewPM] Plug Greedy RA in codegen pipeline #120557

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions llvm/include/llvm/Passes/CodeGenPassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1063,7 +1063,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
///
/// A target that uses the standard regalloc pass order for fast or optimized
/// allocation may still override this for per-target regalloc
/// selection. But -regalloc=... always takes precedence.
/// selection. But -regalloc-npm=... always takes precedence.
/// If a target does not want to allow users to set -regalloc-npm=... at all,
/// check if Opt.RegAlloc == RegAllocType::Unset.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
AddMachinePass &addPass, bool Optimized) const {
Expand All @@ -1076,10 +1078,29 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
/// Find and instantiate the register allocation pass requested by this target
/// at the current optimization level. Different register allocators are
/// defined as separate passes because they may require different analysis.
///
/// This helper ensures that the -regalloc-npm= option is always available,
/// even for targets that override the default allocator.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
// TODO: Parse Opt.RegAlloc to add register allocator.
// Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
if (Opt.RegAlloc > RegAllocType::Default) {
switch (Opt.RegAlloc) {
case RegAllocType::Fast:
addPass(RegAllocFastPass());
break;
case RegAllocType::Greedy:
addPass(RAGreedyPass());
break;
default:
report_fatal_error("register allocator not supported yet", false);
}
return;
}
// -regalloc=default or unspecified, so pick based on the optimization level
// or ask the target for the regalloc pass.
derived().addTargetRegisterAllocator(addPass, Optimized);
}

template <typename Derived, typename TargetMachineT>
Expand Down Expand Up @@ -1150,20 +1171,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addPass(MachineSchedulerPass(&TM));

if (derived().addRegAssignmentOptimized(addPass)) {
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
derived().addPostRewrite(addPass);
if (auto E = derived().addRegAssignmentOptimized(addPass)) {
// addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
return;
}
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
derived().addPostRewrite(addPass);

// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
addPass(MachineCopyPropagationPass());
// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
addPass(MachineCopyPropagationPass());

// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
addPass(MachineLICMPass());
}
// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
addPass(MachineLICMPass());
}

//===---------------------------------------------------------------------===//
Expand Down
5 changes: 2 additions & 3 deletions llvm/include/llvm/Passes/MachinePassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
},
"filter=reg-filter;no-clear-vregs")

// 'all' is the default filter.
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"greedy", "RAGreedyPass",
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
[PB = this](StringRef Params) {
// TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
(void)PB;
return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
return parseRegAllocGreedyFilterFunc(*PB, Params);
}, "reg-filter"
)
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
Expand Down
20 changes: 18 additions & 2 deletions llvm/include/llvm/Target/CGPassBuilderOption.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,29 @@
#ifndef LLVM_TARGET_CGPASSBUILDEROPTION_H
#define LLVM_TARGET_CGPASSBUILDEROPTION_H

#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
#include <optional>

namespace llvm {

enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };

class RegAllocTypeParser : public cl::parser<RegAllocType> {
public:
RegAllocTypeParser(cl::Option &O) : cl::parser<RegAllocType>(O) {}
void initialize() {
cl::parser<RegAllocType>::initialize();
addLiteralOption("default", RegAllocType::Default,
"Default register allocator");
addLiteralOption("pbqp", RegAllocType::PBQP, "PBQP register allocator");
addLiteralOption("fast", RegAllocType::Fast, "Fast register allocator");
addLiteralOption("basic", RegAllocType::Basic, "Basic register allocator");
addLiteralOption("greedy", RegAllocType::Greedy,
"Greedy register allocator");
}
};

// Not one-on-one but mostly corresponding to commandline options in
// TargetPassConfig.cpp.
Expand Down Expand Up @@ -52,7 +68,7 @@ struct CGPassBuilderOption {
bool RequiresCodeGenSCCOrder = false;

RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
StringRef RegAlloc = "default";
RegAllocType RegAlloc = RegAllocType::Unset;
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
std::string FSProfileFile;
std::string FSRemappingFile;
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,20 @@ parseBoundsCheckingOptions(StringRef Params) {
return Options;
}

Expected<RAGreedyPass::Options>
parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
if (Params.empty() || Params == "all")
return RAGreedyPass::Options();

std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
if (Filter)
return RAGreedyPass::Options{*Filter, Params};

return make_error<StringError>(
formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
inconvertibleErrorCode());
}

} // namespace

/// Tests whether a pass name starts with a valid prefix for a default pipeline
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=greedy -o - %s | FileCheck %s

--- |
define void @inst_stores_to_dead_spill_implicit_def_impdef() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
# RUN: llc -mtriple=arm64-apple-ios -passes=greedy -o - %s | FileCheck %s

---
name: widget
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/pr51516.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=greedy -verify-machineinstrs -o - %s | FileCheck %s

# Check that we spill %31 and do not rematerialize it since the use operand
# of ADDXri is killed by the STRXui in this block.
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AArch64/spill-fold.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=greedy -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=greedy -o - %s | FileCheck %s
--- |
define i64 @test_subreg_spill_fold() { ret i64 0 }
define i64 @test_subreg_spill_fold2() { ret i64 0 }
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/MIR/Generic/runPass.mir
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
# RUN: llc -passes=greedy -o - %s | FileCheck %s

# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=greedy
#PR34502. Check HoistSpill works properly after the live range of spilled
#virtual register is cleared.
--- |
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/Thumb/high-reg-clobber.mir
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=greedy %s -o - | FileCheck %s

...
---
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/limit-split-cost.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# REQUIRES: asserts
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
# RUN: llc -mtriple=x86_64-- -passes=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
# Check no global region split is needed because the live range to split is trivially rematerializable.
# CHECK-NOT: Compact region bundles
--- |
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/tools/llc/new-pm/x86_64-regalloc-pipeline.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# REQUIRES x86_64-registered-target
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-new-pm -O3 -regalloc-npm=fast -print-pipeline-passes %s 2>&1 | FileCheck %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this RUN line (and the one on the next line) will write to the local directory since it's lacking any "-o" flag, right?
If we don't care about the output, can we please add "-o /dev/null" like in "limit-split-cost.mir" above?
The current directory may be write protected.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-filetype=null should be use to discard llc output

# RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-new-pm -O3 -regalloc-npm=greedy -print-pipeline-passes %s 2>&1 | FileCheck %s --check-prefix=CHECK-GREEDY

# CHECK: regallocfast
# CHECK-GREEDY: greedy<all>
4 changes: 2 additions & 2 deletions llvm/tools/llc/NewPMDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@

using namespace llvm;

static cl::opt<std::string>
static cl::opt<RegAllocType, false, RegAllocTypeParser>
RegAlloc("regalloc-npm",
cl::desc("Register allocator to use for new pass manager"),
cl::Hidden, cl::init("default"));
cl::Hidden, cl::init(RegAllocType::Unset));

static cl::opt<bool>
DebugPM("debug-pass-manager", cl::Hidden,
Expand Down
Loading