Skip to content

[CodeGen][NewPM] Handle --regalloc-npm option #94748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 47 additions & 47 deletions llvm/include/llvm/Passes/CodeGenPassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@
#include "llvm/IRPrinter/IRPrintingPasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/CFGuard.h"
Expand Down Expand Up @@ -116,9 +118,8 @@ namespace llvm {
template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
public:
explicit CodeGenPassBuilder(TargetMachineT &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC)
: TM(TM), Opt(Opts), PIC(PIC) {
const CGPassBuilderOption &Opts, PassBuilder &PB)
: TM(TM), Opt(Opts), PB(PB), PIC(PB.getPassInstrumentationCallbacks()) {
// Target could set CGPassBuilderOption::MISchedPostRA to true to achieve
// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID)

Expand Down Expand Up @@ -253,6 +254,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {

TargetMachineT &TM;
CGPassBuilderOption Opt;
PassBuilder &PB;
PassInstrumentationCallbacks *PIC;

template <typename TMC> TMC &getTM() const { return static_cast<TMC &>(TM); }
Expand Down Expand Up @@ -431,7 +433,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {

/// addOptimizedRegAlloc - Add passes related to register allocation.
/// LLVMTargetMachine provides standard regalloc passes for most targets.
void addOptimizedRegAlloc(AddMachinePass &) const;
Error addOptimizedRegAlloc(AddMachinePass &) const;

/// Add passes that optimize machine instructions after register allocation.
void addMachineLateOptimization(AddMachinePass &) const;
Expand All @@ -453,13 +455,9 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
/// Utilities for targets to add passes to the pass manager.
///

/// createTargetRegisterAllocator - Create the register allocator pass for
/// this target at the current optimization level.
void addTargetRegisterAllocator(AddMachinePass &, bool Optimized) const;

/// addMachinePasses helper to create the target-selected or overriden
/// regalloc pass.
void addRegAllocPass(AddMachinePass &, bool Optimized) const;
Error addRegAllocPass(AddMachinePass &, StringRef FilterName = "all") const;

/// Add core register alloator passes which do the actual register assignment
/// and rewriting. \returns true if any passes were added.
Expand Down Expand Up @@ -521,6 +519,9 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::buildPipeline(
return StartStopInfo.takeError();
setStartStopPasses(*StartStopInfo);

if (auto Err = PB.parseRegAllocOpt(Opt.RegAlloc))
return Err;

bool PrintAsm = TargetPassConfig::willCompleteCodeGenPipeline();
bool PrintMIR = !PrintAsm && FileType != CodeGenFileType::Null;

Expand Down Expand Up @@ -895,7 +896,8 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
if (*Opt.OptimizeRegAlloc) {
derived().addOptimizedRegAlloc(addPass);
if (auto Err = derived().addOptimizedRegAlloc(addPass))
return Err;
} else {
if (auto Err = derived().addFastRegAlloc(addPass))
return Err;
Expand Down Expand Up @@ -1025,45 +1027,40 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
/// Register Allocation Pass Configuration
//===---------------------------------------------------------------------===//

/// Instantiate the default register allocator pass for this target for either
/// the optimized or unoptimized allocation path. This will be added to the pass
/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
/// in the optimized case.
///
/// A target that uses the standard regalloc pass order for fast or optimized
/// allocation may still override this for per-target regalloc
/// selection. But -regalloc=... always takes precedence.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
AddMachinePass &addPass, bool Optimized) const {
if (Optimized)
addPass(RAGreedyPass());
else
addPass(RegAllocFastPass());
}

/// Find and instantiate the register allocation pass requested by this target
/// at the current optimization level. Different register allocators are
/// defined as separate passes because they may require different analysis.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
// TODO: Parse Opt.RegAlloc to add register allocator.
Error CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, StringRef FilterName) const {
auto &RegAllocMap = PB.getRegAllocMap();
if (RegAllocMap.contains("none"))
return Error::success();

if (!RegAllocMap.contains(FilterName)) {
return make_error<StringError>(
formatv("No register allocator for register class filter '{0}'",
FilterName)
.str(),
inconvertibleErrorCode());
}

addPass(std::move(RegAllocMap[FilterName]));
return Error::success();
}

template <typename Derived, typename TargetMachineT>
Error CodeGenPassBuilder<Derived, TargetMachineT>::addRegAssignmentFast(
AddMachinePass &addPass) const {
// TODO: Ensure allocator is default or fast.
addRegAllocPass(addPass, false);
return Error::success();
return addRegAllocPass(addPass);
}

template <typename Derived, typename TargetMachineT>
Error CodeGenPassBuilder<Derived, TargetMachineT>::addRegAssignmentOptimized(
AddMachinePass &addPass) const {
// Add the selected register allocation pass.
addRegAllocPass(addPass, true);
if (auto Err = addRegAllocPass(addPass))
return Err;

// Allow targets to change the register assignments before rewriting.
derived().addPreRewrite(addPass);
Expand Down Expand Up @@ -1093,7 +1090,7 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addFastRegAlloc(
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
Error CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
AddMachinePass &addPass) const {
addPass(DetectDeadLanesPass());

Expand All @@ -1119,20 +1116,23 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addPass(MachineSchedulerPass());

if (derived().addRegAssignmentOptimized(addPass)) {
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
derived().addPostRewrite(addPass);
if (auto Err = derived().addRegAssignmentOptimized(addPass))
return Err;

// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
addPass(MachineCopyPropagationPass());
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
derived().addPostRewrite(addPass);

// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
addPass(MachineLICMPass());
}
// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
addPass(MachineCopyPropagationPass());

// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
addPass(MachineLICMPass());

return Error::success();
}

//===---------------------------------------------------------------------===//
Expand Down
21 changes: 20 additions & 1 deletion llvm/include/llvm/Passes/PassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class PassBuilder {
PipelineTuningOptions PTO;
std::optional<PGOOptions> PGOOpt;
PassInstrumentationCallbacks *PIC;
StringMap<MachineFunctionPassManager> RegAllocMap;

public:
/// A struct to capture parsed pass pipeline names.
Expand Down Expand Up @@ -582,12 +583,28 @@ class PassBuilder {

/// Register callbacks to parse target specific filter field if regalloc pass
/// needs it. E.g. AMDGPU requires regalloc passes can handle sgpr and vgpr
/// separately.
/// separately. Currently "all" and "none" are preserved filter name.
void registerRegClassFilterParsingCallback(
const std::function<RegClassFilterFunc(StringRef)> &C) {
RegClassFilterParsingCallbacks.push_back(C);
}

/// Parse command line option `--regalloc-npm`
/// Should only be called by CodeGenPassBuilder.
Error parseRegAllocOpt(StringRef Text);

/// Target hook to set default regalloc.
void setDefaultRegAllocBuilder(
const std::function<void(StringMap<MachineFunctionPassManager> &)> &C) {
DefaultRegAllocBuilder = C;
}

/// Used by CodeGenPassBuilder to add correct regalloc pass.
/// Should only be called by CodeGenPassBuilder.
StringMap<MachineFunctionPassManager> &getRegAllocMap() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems very roundabout to use a map that creates a MachineFunctionPassManager. can this instead be a callback that takes a MachineFunctionPassManager and adds a regalloc pass to it? like the optimization pipeline callbacks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The requirements for register allocation passes from backends are somewhat complex. Some targets (DirectX, NVPTX etc) shall not contain them. AMDGPU needs register allocators with correct parameters if pass supports it, here is the register filter. Callbacks may not suitable for this situation, callbacks may add some unexpected passes. WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you mean callbacks may add unexpected passes?

basically my suggestion is replace std::function<void(StringMap<MachineFunctionPassManager> &)> with std::function<void(MachineFunctionPassManager &, StringRef, bool)>, where StringRef is the filter and bool is optimized or not (or maybe that's part of filter? structure however makes most sense). the default callback adds Fast/GreedyRegAlloc based on IsOptimized bool. AMDGPU can set the callback to add its custom regallocs based on the filter, and backends that don't need a regalloc can override the callback to be empty

does that make sense? or is there a benefit of the StringMap over that approach?

return RegAllocMap;
}

/// Register a callback for a top-level pipeline entry.
///
/// If the PassManager type is not given at the top level of the pipeline
Expand Down Expand Up @@ -807,6 +824,8 @@ class PassBuilder {
// Callbacks to parse `filter` parameter in register allocation passes
SmallVector<std::function<RegClassFilterFunc(StringRef)>, 2>
RegClassFilterParsingCallbacks;
std::function<void(StringMap<MachineFunctionPassManager> &)>
DefaultRegAllocBuilder;
};

/// This utility template takes care of adding require<> and invalidate<>
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Target/TargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ class LLVMTargetMachine : public TargetMachine {
virtual Error buildCodeGenPipeline(ModulePassManager &, raw_pwrite_stream &,
raw_pwrite_stream *, CodeGenFileType,
const CGPassBuilderOption &,
PassInstrumentationCallbacks *) {
PassBuilder &) {
return make_error<StringError>("buildCodeGenPipeline is not overridden",
inconvertibleErrorCode());
}
Expand Down
74 changes: 73 additions & 1 deletion llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,13 +409,33 @@ class RequireAllMachineFunctionPropertiesPass

} // namespace

static void defaultRegAllocBuilder(TargetMachine *TM,
StringMap<MachineFunctionPassManager> &M) {
if (!TM)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should always be non-null here

return;

MachineFunctionPassManager MFPM;
auto Opts = getCGPassBuilderOption();
if (Opts.OptimizeRegAlloc.value_or(TM->getOptLevel() !=
CodeGenOptLevel::None)) {
// TODO: Add greedy register allocator.
} else {
MFPM.addPass(RegAllocFastPass());
}
M["all"] = std::move(MFPM);
}

PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
std::optional<PGOOptions> PGOOpt,
PassInstrumentationCallbacks *PIC)
: TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {
bool ShouldPopulateClassToPassNames = PIC && shouldPopulateClassToPassNames();
if (TM)
if (TM) {
DefaultRegAllocBuilder = [TM](StringMap<MachineFunctionPassManager> &M) {
defaultRegAllocBuilder(TM, M);
};
TM->registerPassBuilderCallbacks(*this, ShouldPopulateClassToPassNames);
}
if (ShouldPopulateClassToPassNames) {
#define MODULE_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
Expand Down Expand Up @@ -2219,6 +2239,19 @@ Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
return Error::success();
}

static StringRef getFilterName(StringRef PassName) {
StringRef Params = PassName.drop_until([](char C) { return C == '<'; });
if (!Params.empty())
Params = Params.drop_front().drop_back();
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
if (ParamName.consume_front("filter="))
return ParamName;
}
return "all";
}

RegClassFilterFunc PassBuilder::parseRegAllocFilter(StringRef FilterName) {
if (FilterName == "all")
return allocateAllRegClasses;
Expand All @@ -2228,6 +2261,45 @@ RegClassFilterFunc PassBuilder::parseRegAllocFilter(StringRef FilterName) {
return nullptr;
}

Error PassBuilder::parseRegAllocOpt(StringRef Text) {
assert(TM && "Need target machine to parse this option!");
if (RegAllocMap.empty())
DefaultRegAllocBuilder(RegAllocMap);

MachineFunctionPassManager MFPM;
if (Text == "default") {
// Add nothing when target inserts "none" into the map.
if (RegAllocMap.contains("none"))
RegAllocMap["all"] = MachineFunctionPassManager();
return Error::success();
}

if (RegAllocMap.contains("none")) {
return make_error<StringError>(
"Target doesn't support register allocation!",
inconvertibleErrorCode());
}

bool IsOptimized = TM->getOptLevel() != CodeGenOptLevel::None;
while (!Text.empty()) {
StringRef PassName;
std::tie(PassName, Text) = Text.split(',');
if (!IsOptimized &&
!PassBuilder::checkParametrizedPassName(PassName, "regallocfast")) {
return make_error<StringError>(
"Must use fast (default) register allocator for "
"unoptimized regalloc.",
inconvertibleErrorCode());
}
// FIXME: Should only accept reg-alloc passes.
if (auto Err = parsePassPipeline(MFPM, PassName))
return Err;
RegAllocMap[getFilterName(PassName)] = std::move(MFPM);
MFPM = MachineFunctionPassManager();
}
return Error::success();
}

static void printPassName(StringRef PassName, raw_ostream &OS) {
OS << " " << PassName << "\n";
}
Expand Down
21 changes: 18 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
using namespace llvm;

AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
AMDGPUTargetMachine &TM, const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC)
: CodeGenPassBuilder(TM, Opts, PIC) {
AMDGPUTargetMachine &TM, const CGPassBuilderOption &Opts, PassBuilder &PB)
: CodeGenPassBuilder(TM, Opts, PB) {
Opt.RequiresCodeGenSCCOrder = true;
// Exceptions and StackMaps are not supported, so these passes will never do
// anything.
Expand All @@ -40,3 +39,19 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
addPass(AMDGPUISelDAGToDAGPass(TM));
return Error::success();
}

Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
AddMachinePass &addPass) const {
if (auto Err = addRegAllocPass(addPass, "sgpr"))
return Err;
// TODO: Add other passes.
if (auto Err = addRegAllocPass(addPass, "vgpr"))
return Err;
return Error::success();
}

Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
AddMachinePass &addPass) const {
// TODO: Add greedy register allocator.
return Error::success();
}
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ class AMDGPUCodeGenPassBuilder
: public CodeGenPassBuilder<AMDGPUCodeGenPassBuilder, AMDGPUTargetMachine> {
public:
AMDGPUCodeGenPassBuilder(AMDGPUTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
const CGPassBuilderOption &Opts, PassBuilder &PB);

void addPreISel(AddIRPass &addPass) const;
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
Error addRegAssignmentFast(AddMachinePass &addPass) const;
Error addRegAssignmentOptimized(AddMachinePass &) const;
};

} // namespace llvm
Expand Down
Loading
Loading