Skip to content

Commit 3acf7e7

Browse files
committed
[TailDuplicator] Do not restrict the computed gotos
1 parent ba44cbf commit 3acf7e7

File tree

3 files changed

+45
-28
lines changed

3 files changed

+45
-28
lines changed

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -994,8 +994,17 @@ class MachineInstr
994994

995995
/// Return true if this is an indirect branch, such as a
996996
/// branch through a register.
997-
bool isIndirectBranch(QueryType Type = AnyInBundle) const {
998-
return hasProperty(MCID::IndirectBranch, Type);
997+
bool isIndirectBranch(QueryType Type = AnyInBundle,
998+
bool IncludeJumpTable = true) const {
999+
return hasProperty(MCID::IndirectBranch, Type) &&
1000+
(IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
1001+
return Op.isJTI();
1002+
}));
1003+
}
1004+
1005+
bool isComputedGoto(QueryType Type = AnyInBundle) const {
1006+
// Jump tables are not considered computed gotos.
1007+
return isIndirectBranch(Type, /*IncludeJumpTable=*/false);
9991008
}
10001009

10011010
/// Return true if this is a branch which may fall

llvm/lib/CodeGen/TailDuplicator.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -601,8 +601,11 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
601601
// that rearrange the predecessors of the indirect branch.
602602

603603
bool HasIndirectbr = false;
604-
if (!TailBB.empty())
604+
bool HasComputedGoto = false;
605+
if (!TailBB.empty()) {
605606
HasIndirectbr = TailBB.back().isIndirectBranch();
607+
HasComputedGoto = TailBB.back().isComputedGoto();
608+
}
606609

607610
if (HasIndirectbr && PreRegAlloc)
608611
MaxDuplicateCount = TailDupIndirectBranchSize;
@@ -660,7 +663,12 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
660663
// Duplicating a BB which has both multiple predecessors and successors will
661664
// may cause huge amount of PHI nodes. If we want to remove this limitation,
662665
// we have to address https://github.com/llvm/llvm-project/issues/78578.
663-
if (TailBB.pred_size() > TailDupPredSize &&
666+
// NB. This basically unfactors computed gotos that were factored early on in
667+
// the compilation process to speed up edge based data flow. If we do not
668+
// unfactor them again, it can seriously pessimize code with many computed
669+
// jumps in the source code, such as interpreters. Therefore we do not
670+
// restrict the computed gotos.
671+
if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize &&
664672
TailBB.succ_size() > TailDupSuccSize) {
665673
// If TailBB or any of its successors contains a phi, we may have to add a
666674
// large number of additional phis with additional incoming values.

llvm/test/CodeGen/X86/tail-dup-computed-goto.mir

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,59 +18,59 @@ tracksRegLiveness: true
1818
body: |
1919
; CHECK-LABEL: name: computed_goto
2020
; CHECK: bb.0:
21-
; CHECK-NEXT: successors: %bb.5(0x80000000)
21+
; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
2222
; CHECK-NEXT: {{ $}}
2323
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
2424
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
2525
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
2626
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
27-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY [[COPY]]
28-
; CHECK-NEXT: JMP_1 %bb.5
27+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY]]
28+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
29+
; CHECK-NEXT: JMP64m $noreg, 8, [[COPY1]], @computed_goto.dispatch, $noreg
2930
; CHECK-NEXT: {{ $}}
3031
; CHECK-NEXT: bb.1:
31-
; CHECK-NEXT: successors: %bb.5(0x80000000)
32+
; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
3233
; CHECK-NEXT: {{ $}}
3334
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
3435
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
3536
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
36-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rax
37-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY2]]
38-
; CHECK-NEXT: JMP_1 %bb.5
37+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax
38+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64_nosp = COPY [[COPY3]]
39+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64_nosp = COPY [[COPY4]]
40+
; CHECK-NEXT: JMP64m $noreg, 8, [[COPY4]], @computed_goto.dispatch, $noreg
3941
; CHECK-NEXT: {{ $}}
4042
; CHECK-NEXT: bb.2:
41-
; CHECK-NEXT: successors: %bb.5(0x80000000)
43+
; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
4244
; CHECK-NEXT: {{ $}}
4345
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
4446
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
4547
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
46-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
47-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
48-
; CHECK-NEXT: JMP_1 %bb.5
48+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
49+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64_nosp = COPY [[COPY6]]
50+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64_nosp = COPY [[COPY7]]
51+
; CHECK-NEXT: JMP64m $noreg, 8, [[COPY7]], @computed_goto.dispatch, $noreg
4952
; CHECK-NEXT: {{ $}}
5053
; CHECK-NEXT: bb.3:
51-
; CHECK-NEXT: successors: %bb.5(0x80000000)
54+
; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
5255
; CHECK-NEXT: {{ $}}
5356
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
5457
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
5558
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
56-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
57-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
58-
; CHECK-NEXT: JMP_1 %bb.5
59+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY $rax
60+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64_nosp = COPY [[COPY9]]
61+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64_nosp = COPY [[COPY10]]
62+
; CHECK-NEXT: JMP64m $noreg, 8, [[COPY10]], @computed_goto.dispatch, $noreg
5963
; CHECK-NEXT: {{ $}}
6064
; CHECK-NEXT: bb.4:
61-
; CHECK-NEXT: successors: %bb.5(0x80000000)
65+
; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
6266
; CHECK-NEXT: {{ $}}
6367
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
6468
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
6569
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
66-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY $rax
67-
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY [[COPY8]]
68-
; CHECK-NEXT: {{ $}}
69-
; CHECK-NEXT: bb.5:
70-
; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
71-
; CHECK-NEXT: {{ $}}
72-
; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64_nosp = PHI [[COPY1]], %bb.0, [[COPY9]], %bb.4, [[COPY7]], %bb.3, [[COPY5]], %bb.2, [[COPY3]], %bb.1
73-
; CHECK-NEXT: JMP64m $noreg, 8, [[PHI]], @computed_goto.dispatch, $noreg
70+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gr64 = COPY $rax
71+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gr64_nosp = COPY [[COPY12]]
72+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:gr64_nosp = COPY [[COPY13]]
73+
; CHECK-NEXT: JMP64m $noreg, 8, [[COPY13]], @computed_goto.dispatch, $noreg
7474
bb.0:
7575
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
7676
CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax

0 commit comments

Comments
 (0)