Skip to content

Commit a410db3

Browse files
committed
AMDGPU/NewPM: Fill out addPreISelPasses
This specific callback should now be at parity with the old pass manager version. There are still some missing IR passes before this point. Also I don't understand the need for the RequiresAnalysisPass at the end. SelectionDAG should just be using the uncached getResult?
1 parent 306343c commit a410db3

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp

+53-2
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,17 @@
99
#include "AMDGPUCodeGenPassBuilder.h"
1010
#include "AMDGPU.h"
1111
#include "AMDGPUISelDAGToDAG.h"
12+
#include "AMDGPUPerfHintAnalysis.h"
1213
#include "AMDGPUTargetMachine.h"
14+
#include "AMDGPUUnifyDivergentExitNodes.h"
1315
#include "SIFixSGPRCopies.h"
1416
#include "llvm/Analysis/UniformityAnalysis.h"
17+
#include "llvm/Transforms/Scalar/FlattenCFG.h"
18+
#include "llvm/Transforms/Scalar/Sink.h"
19+
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
20+
#include "llvm/Transforms/Utils/FixIrreducible.h"
21+
#include "llvm/Transforms/Utils/LCSSA.h"
22+
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
1523

1624
using namespace llvm;
1725

@@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
2836
}
2937

3038
void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
31-
// TODO: Add passes pre instruction selection.
32-
// Test only, convert to real IR passes in future.
39+
const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG;
40+
const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer;
41+
const bool EnableStructurizerWorkarounds =
42+
AMDGPUTargetMachine::EnableStructurizerWorkarounds;
43+
44+
if (TM.getOptLevel() > CodeGenOptLevel::None)
45+
addPass(FlattenCFGPass());
46+
47+
if (TM.getOptLevel() > CodeGenOptLevel::None)
48+
addPass(SinkingPass());
49+
50+
addPass(AMDGPULateCodeGenPreparePass(TM));
51+
52+
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
53+
// regions formed by them.
54+
55+
addPass(AMDGPUUnifyDivergentExitNodesPass());
56+
57+
if (!LateCFGStructurize && !DisableStructurizer) {
58+
if (EnableStructurizerWorkarounds) {
59+
addPass(FixIrreduciblePass());
60+
addPass(UnifyLoopExitsPass());
61+
}
62+
63+
addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false));
64+
}
65+
66+
addPass(AMDGPUAnnotateUniformValuesPass());
67+
68+
if (!LateCFGStructurize && !DisableStructurizer) {
69+
addPass(SIAnnotateControlFlowPass(TM));
70+
71+
// TODO: Move this right after structurizeCFG to avoid extra divergence
72+
// analysis. This depends on stopping SIAnnotateControlFlow from making
73+
// control flow modifications.
74+
addPass(AMDGPURewriteUndefForPHIPass());
75+
}
76+
77+
addPass(LCSSAPass());
78+
79+
if (TM.getOptLevel() > CodeGenOptLevel::Less)
80+
addPass(AMDGPUPerfHintAnalysisPass(TM));
81+
82+
// FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why
83+
// isn't this in addInstSelector?
3384
addPass(RequireAnalysisPass<UniformityInfoAnalysis, Function>());
3485
}
3586

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,11 @@ static cl::opt<bool> EnableScalarIRPasses(
338338
cl::init(true),
339339
cl::Hidden);
340340

341-
static cl::opt<bool> EnableStructurizerWorkarounds(
341+
static cl::opt<bool, true> EnableStructurizerWorkarounds(
342342
"amdgpu-enable-structurizer-workarounds",
343-
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
344-
cl::Hidden);
343+
cl::desc("Enable workarounds for the StructurizeCFG pass"),
344+
cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
345+
cl::init(true), cl::Hidden);
345346

346347
static cl::opt<bool, true> EnableLowerModuleLDS(
347348
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
@@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
611612
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
612613
bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
613614
bool AMDGPUTargetMachine::DisableStructurizer = false;
615+
bool AMDGPUTargetMachine::EnableStructurizerWorkarounds = true;
614616

615617
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
616618

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
3838
static bool EnableFunctionCalls;
3939
static bool EnableLowerModuleLDS;
4040
static bool DisableStructurizer;
41+
static bool EnableStructurizerWorkarounds;
4142

4243
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
4344
StringRef FS, const TargetOptions &Options,

llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
2-
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel -enable-new-pm | FileCheck %s --check-prefixes=CHECK
2+
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -enable-new-pm -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
33

44
; This caused failure in infinite cycle in Selection DAG (combine) due to missing insert_subvector.
55
;

0 commit comments

Comments
 (0)