Skip to content

Commit 76ea5fe

Browse files
authored
[AArch64] Combine concat(binop, binop) into binop(concat, concat) (#89911)
This generalizes the existing combine for concat(radd, radd) to any binops. For much the same reason as the existing code, pushing the concat up through the tree are hopefully quicker (or the same) as the existing two half-vector operations, and can help combine away the concat.
1 parent 03b1a0c commit 76ea5fe

File tree

3 files changed

+63
-44
lines changed

3 files changed

+63
-44
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+5-7
Original file line numberDiff line numberDiff line change
@@ -18663,14 +18663,12 @@ static SDValue performConcatVectorsCombine(SDNode *N,
1866318663
if (DCI.isBeforeLegalizeOps())
1866418664
return SDValue();
1866518665

18666-
// Optimise concat_vectors of two [us]avgceils or [us]avgfloors with a 128-bit
18667-
// destination size, combine into an avg of two contacts of the source
18668-
// vectors. eg: concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c),
18669-
// concat(b, d))
18666+
// Optimise concat_vectors of two identical binops with a 128-bit destination
18667+
// size, combine into an binop of two contacts of the source vectors. eg:
18668+
// concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
1867018669
if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
18671-
(N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS ||
18672-
N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS) &&
18673-
N0->hasOneUse() && N1->hasOneUse()) {
18670+
DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
18671+
N1->hasOneUse()) {
1867418672
SDValue N00 = N0->getOperand(0);
1867518673
SDValue N01 = N0->getOperand(1);
1867618674
SDValue N10 = N1->getOperand(0);

llvm/test/CodeGen/AArch64/concatbinop.ll

+50-31
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55
define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
66
; CHECK-LABEL: concat_add:
77
; CHECK: // %bb.0:
8-
; CHECK-NEXT: add v2.4h, v2.4h, v3.4h
9-
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
8+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
9+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
10+
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
11+
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
12+
; CHECK-NEXT: mov v1.d[1], v3.d[0]
1013
; CHECK-NEXT: mov v0.d[1], v2.d[0]
14+
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
1115
; CHECK-NEXT: ret
1216
%x = add <4 x i16> %a, %b
1317
%y = add <4 x i16> %c, %d
@@ -33,13 +37,9 @@ define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
3337
define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
3438
; CHECK-LABEL: concat_addtunc2:
3539
; CHECK: // %bb.0:
36-
; CHECK-NEXT: xtn v1.4h, v1.4s
37-
; CHECK-NEXT: xtn v0.4h, v0.4s
38-
; CHECK-NEXT: xtn v2.4h, v2.4s
39-
; CHECK-NEXT: xtn v3.4h, v3.4s
40-
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
41-
; CHECK-NEXT: add v1.4h, v2.4h, v3.4h
42-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
40+
; CHECK-NEXT: uzp1 v1.8h, v1.8h, v3.8h
41+
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
42+
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
4343
; CHECK-NEXT: ret
4444
%at = trunc <4 x i32> %a to <4 x i16>
4545
%bt = trunc <4 x i32> %b to <4 x i16>
@@ -54,9 +54,13 @@ define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
5454
define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
5555
; CHECK-LABEL: concat_sub:
5656
; CHECK: // %bb.0:
57-
; CHECK-NEXT: sub v2.4h, v2.4h, v3.4h
58-
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
57+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
58+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
59+
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
60+
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
61+
; CHECK-NEXT: mov v1.d[1], v3.d[0]
5962
; CHECK-NEXT: mov v0.d[1], v2.d[0]
63+
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
6064
; CHECK-NEXT: ret
6165
%x = sub <4 x i16> %a, %b
6266
%y = sub <4 x i16> %c, %d
@@ -67,9 +71,13 @@ define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
6771
define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
6872
; CHECK-LABEL: concat_mul:
6973
; CHECK: // %bb.0:
70-
; CHECK-NEXT: mul v2.4h, v2.4h, v3.4h
71-
; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h
74+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
75+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
76+
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
77+
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
78+
; CHECK-NEXT: mov v1.d[1], v3.d[0]
7279
; CHECK-NEXT: mov v0.d[1], v2.d[0]
80+
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
7381
; CHECK-NEXT: ret
7482
%x = mul <4 x i16> %a, %b
7583
%y = mul <4 x i16> %c, %d
@@ -80,9 +88,13 @@ define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
8088
define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
8189
; CHECK-LABEL: concat_xor:
8290
; CHECK: // %bb.0:
83-
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
84-
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
91+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
92+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
93+
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
94+
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
95+
; CHECK-NEXT: mov v1.d[1], v3.d[0]
8596
; CHECK-NEXT: mov v0.d[1], v2.d[0]
97+
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
8698
; CHECK-NEXT: ret
8799
%x = xor <4 x i16> %a, %b
88100
%y = xor <4 x i16> %c, %d
@@ -93,9 +105,13 @@ define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
93105
define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
94106
; CHECK-LABEL: concat_fadd:
95107
; CHECK: // %bb.0:
96-
; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h
97-
; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
108+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
109+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
110+
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
111+
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
112+
; CHECK-NEXT: mov v1.d[1], v3.d[0]
98113
; CHECK-NEXT: mov v0.d[1], v2.d[0]
114+
; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
99115
; CHECK-NEXT: ret
100116
%x = fadd <4 x half> %a, %b
101117
%y = fadd <4 x half> %c, %d
@@ -106,9 +122,13 @@ define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
106122
define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
107123
; CHECK-LABEL: concat_fmul:
108124
; CHECK: // %bb.0:
109-
; CHECK-NEXT: fmul v2.4h, v2.4h, v3.4h
110-
; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h
125+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
126+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
127+
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
128+
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
129+
; CHECK-NEXT: mov v1.d[1], v3.d[0]
111130
; CHECK-NEXT: mov v0.d[1], v2.d[0]
131+
; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h
112132
; CHECK-NEXT: ret
113133
%x = fmul <4 x half> %a, %b
114134
%y = fmul <4 x half> %c, %d
@@ -119,9 +139,13 @@ define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
119139
define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
120140
; CHECK-LABEL: concat_min:
121141
; CHECK: // %bb.0:
122-
; CHECK-NEXT: fminnm v2.4h, v2.4h, v3.4h
123-
; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
142+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
143+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
144+
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
145+
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
146+
; CHECK-NEXT: mov v1.d[1], v3.d[0]
124147
; CHECK-NEXT: mov v0.d[1], v2.d[0]
148+
; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h
125149
; CHECK-NEXT: ret
126150
%x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
127151
%y = call <4 x half> @llvm.minnum.v4f16(<4 x half> %c, <4 x half> %d)
@@ -146,21 +170,16 @@ define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture n
146170
; CHECK-LABEL: signOf_neon:
147171
; CHECK: // %bb.0: // %entry
148172
; CHECK-NEXT: ldp q1, q2, [x0]
149-
; CHECK-NEXT: movi v0.8b, #1
173+
; CHECK-NEXT: movi v0.16b, #1
150174
; CHECK-NEXT: ldp q3, q4, [x1]
151175
; CHECK-NEXT: cmhi v5.8h, v1.8h, v3.8h
152176
; CHECK-NEXT: cmhi v6.8h, v2.8h, v4.8h
153177
; CHECK-NEXT: cmhi v1.8h, v3.8h, v1.8h
154178
; CHECK-NEXT: cmhi v2.8h, v4.8h, v2.8h
155-
; CHECK-NEXT: xtn v3.8b, v5.8h
156-
; CHECK-NEXT: xtn v4.8b, v6.8h
157-
; CHECK-NEXT: xtn v1.8b, v1.8h
158-
; CHECK-NEXT: xtn v2.8b, v2.8h
159-
; CHECK-NEXT: and v3.8b, v3.8b, v0.8b
160-
; CHECK-NEXT: and v4.8b, v4.8b, v0.8b
161-
; CHECK-NEXT: orr v0.8b, v3.8b, v1.8b
162-
; CHECK-NEXT: orr v1.8b, v4.8b, v2.8b
163-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
179+
; CHECK-NEXT: uzp1 v3.16b, v5.16b, v6.16b
180+
; CHECK-NEXT: uzp1 v1.16b, v1.16b, v2.16b
181+
; CHECK-NEXT: and v0.16b, v3.16b, v0.16b
182+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
164183
; CHECK-NEXT: ret
165184
entry:
166185
%0 = load <8 x i16>, ptr %a, align 2

llvm/test/CodeGen/AArch64/vecreduce-add.ll

+8-6
Original file line numberDiff line numberDiff line change
@@ -2825,10 +2825,11 @@ entry:
28252825
define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
28262826
; CHECK-SD-LABEL: add_pair_v2i16_v2i64_zext:
28272827
; CHECK-SD: // %bb.0: // %entry
2828-
; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
2829-
; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b
2830-
; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
2828+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
2829+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
2830+
; CHECK-SD-NEXT: movi v2.2d, #0x00ffff0000ffff
28312831
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
2832+
; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
28322833
; CHECK-SD-NEXT: uaddlv d0, v0.4s
28332834
; CHECK-SD-NEXT: fmov x0, d0
28342835
; CHECK-SD-NEXT: ret
@@ -3578,10 +3579,11 @@ entry:
35783579
define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
35793580
; CHECK-SD-LABEL: add_pair_v2i8_v2i64_zext:
35803581
; CHECK-SD: // %bb.0: // %entry
3581-
; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff
3582-
; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b
3583-
; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
3582+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
3583+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
3584+
; CHECK-SD-NEXT: movi v2.2d, #0x0000ff000000ff
35843585
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
3586+
; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
35853587
; CHECK-SD-NEXT: uaddlv d0, v0.4s
35863588
; CHECK-SD-NEXT: fmov x0, d0
35873589
; CHECK-SD-NEXT: ret

0 commit comments

Comments
 (0)