Skip to content

Commit c867389

Browse files
author
Manish Kausik H
committed
Consider If StackRelignable during DAG Legalization of insertelement
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This could lead to misalignment between the stack and the instruction generated. This patch fixes the issue. There was a similar issue reported for `extractelement` which was fixed in #a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c
1 parent 5bd3aef commit c867389

File tree

2 files changed

+137
-4
lines changed

2 files changed

+137
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+15-4
Original file line numberDiff line numberDiff line change
@@ -1474,11 +1474,17 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
14741474
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
14751475

14761476
// First store the whole vector.
1477-
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
1477+
Align BaseVecAlignment =
1478+
DAG.getMachineFunction().getFrameInfo().getObjectAlign(FI);
1479+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
1480+
BaseVecAlignment);
14781481

14791482
// Freeze the index so we don't poison the clamping code we're about to emit.
14801483
Idx = DAG.getFreeze(Idx);
14811484

1485+
Type *PartTy = PartVT.getTypeForEVT(*DAG.getContext());
1486+
Align PartAlignment = DAG.getDataLayout().getPrefTypeAlign(PartTy);
1487+
14821488
// Then store the inserted part.
14831489
if (PartVT.isVector()) {
14841490
SDValue SubStackPtr =
@@ -1487,7 +1493,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
14871493
// Store the subvector.
14881494
Ch = DAG.getStore(
14891495
Ch, dl, Part, SubStackPtr,
1490-
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
1496+
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
1497+
PartAlignment);
14911498
} else {
14921499
SDValue SubStackPtr =
14931500
TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
@@ -1496,11 +1503,15 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
14961503
Ch = DAG.getTruncStore(
14971504
Ch, dl, Part, SubStackPtr,
14981505
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
1499-
VecVT.getVectorElementType());
1506+
VecVT.getVectorElementType(), PartAlignment);
15001507
}
15011508

1509+
assert(cast<StoreSDNode>(Ch)->getAlign() == PartAlignment &&
1510+
"ElementAlignment does not match!");
1511+
15021512
// Finally, load the updated vector.
1503-
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
1513+
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
1514+
BaseVecAlignment);
15041515
}
15051516

15061517
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
9+
; CHECK-NEXT: andl $7, %edi
10+
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
11+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
12+
; CHECK-NEXT: retq
13+
entry:
14+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
15+
ret <8 x i32> %a
16+
}
17+
18+
define <8 x i32> @foo2(<8 x i32> %arg1, i32 %n) alignstack(8) #0 {
19+
; CHECK-LABEL: foo2:
20+
; CHECK: # %bb.0: # %entry
21+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
22+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
23+
; CHECK-NEXT: andl $7, %edi
24+
; CHECK-NEXT: movl $42, -32(%rsp,%rdi,4)
25+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
26+
; CHECK-NEXT: retq
27+
entry:
28+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
29+
ret <8 x i32> %a
30+
}
31+
32+
define <8 x i32> @foo3(<8 x i32> %arg1, i32 %n) alignstack(16) #0 {
33+
; CHECK-LABEL: foo3:
34+
; CHECK: # %bb.0: # %entry
35+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
36+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
37+
; CHECK-NEXT: andl $7, %edi
38+
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
39+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
40+
; CHECK-NEXT: retq
41+
entry:
42+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
43+
ret <8 x i32> %a
44+
}
45+
46+
define <8 x i32> @foo4(<8 x i32> %arg1, i32 %n) alignstack(64) #0 {
47+
; CHECK-LABEL: foo4:
48+
; CHECK: # %bb.0: # %entry
49+
; CHECK-NEXT: vmovaps %ymm0, -{{[0-9]+}}(%rsp)
50+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
51+
; CHECK-NEXT: andl $7, %edi
52+
; CHECK-NEXT: movl $42, -56(%rsp,%rdi,4)
53+
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %ymm0
54+
; CHECK-NEXT: retq
55+
entry:
56+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
57+
ret <8 x i32> %a
58+
}
59+
60+
define <8 x i32> @foo5(<8 x i32> %arg1, i32 %n) alignstack(256) #0 {
61+
; CHECK-LABEL: foo5:
62+
; CHECK: # %bb.0: # %entry
63+
; CHECK-NEXT: subq $120, %rsp
64+
; CHECK-NEXT: .cfi_def_cfa_offset 128
65+
; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
66+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
67+
; CHECK-NEXT: andl $7, %edi
68+
; CHECK-NEXT: movl $42, 64(%rsp,%rdi,4)
69+
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm0
70+
; CHECK-NEXT: addq $120, %rsp
71+
; CHECK-NEXT: .cfi_def_cfa_offset 8
72+
; CHECK-NEXT: retq
73+
entry:
74+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
75+
ret <8 x i32> %a
76+
}
77+
78+
define <8 x i16> @foo6(<8 x i16> %arg1, i32 %n) #0 {
79+
; CHECK-LABEL: foo6:
80+
; CHECK: # %bb.0: # %entry
81+
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
82+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
83+
; CHECK-NEXT: andl $7, %edi
84+
; CHECK-NEXT: movw $42, -24(%rsp,%rdi,2)
85+
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
86+
; CHECK-NEXT: retq
87+
entry:
88+
%a = insertelement <8 x i16> %arg1, i16 42, i32 %n
89+
ret <8 x i16> %a
90+
}
91+
92+
define <8 x i8> @foo7(<8 x i8> %arg1, i32 %n) #0 {
93+
; CHECK-LABEL: foo7:
94+
; CHECK: # %bb.0: # %entry
95+
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
96+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
97+
; CHECK-NEXT: andl $15, %edi
98+
; CHECK-NEXT: movb $42, -24(%rsp,%rdi)
99+
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
100+
; CHECK-NEXT: retq
101+
entry:
102+
%a = insertelement <8 x i8> %arg1, i8 42, i32 %n
103+
ret <8 x i8> %a
104+
}
105+
106+
define <8 x i64> @foo8(<8 x i64> %arg1, i32 %n) #0 {
107+
; CHECK-LABEL: foo8:
108+
; CHECK: # %bb.0: # %entry
109+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
110+
; CHECK-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
111+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
112+
; CHECK-NEXT: andl $7, %edi
113+
; CHECK-NEXT: movq $42, -72(%rsp,%rdi,8)
114+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
115+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm1
116+
; CHECK-NEXT: retq
117+
entry:
118+
%a = insertelement <8 x i64> %arg1, i64 42, i32 %n
119+
ret <8 x i64> %a
120+
}
121+
122+
attributes #0 = { "no-realign-stack" "target-cpu"="haswell" }

0 commit comments

Comments
 (0)