Skip to content

Commit caca20a

Browse files
author
Manish Kausik H
committed
[SelectionDAG] Use unaligned store/load to move AVX registers onto stack for insertelement
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue. There was a similar issue reported for `extractelement` which was fixed in #a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c
1 parent f091848 commit caca20a

File tree

2 files changed

+33
-12
lines changed

2 files changed

+33
-12
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+15-12
Original file line numberDiff line numberDiff line change
@@ -1469,12 +1469,11 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
14691469
EVT VecVT = Vec.getValueType();
14701470
EVT PartVT = Part.getValueType();
14711471
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
1472-
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
1473-
MachinePointerInfo PtrInfo =
1474-
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
1472+
MachineMemOperand *AlignedMMO = getStackAlignedMMO(
1473+
StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
14751474

14761475
// First store the whole vector.
1477-
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
1476+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
14781477

14791478
// Freeze the index so we don't poison the clamping code we're about to emit.
14801479
Idx = DAG.getFreeze(Idx);
@@ -1485,22 +1484,26 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
14851484
TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, PartVT, Idx);
14861485

14871486
// Store the subvector.
1488-
Ch = DAG.getStore(
1489-
Ch, dl, Part, SubStackPtr,
1490-
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
1487+
Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, AlignedMMO);
14911488
} else {
14921489
SDValue SubStackPtr =
14931490
TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
14941491

14951492
// Store the scalar value.
1496-
Ch = DAG.getTruncStore(
1497-
Ch, dl, Part, SubStackPtr,
1498-
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
1499-
VecVT.getVectorElementType());
1493+
Ch = DAG.getTruncStore(Ch, dl, Part, SubStackPtr,
1494+
VecVT.getVectorElementType(), AlignedMMO);
15001495
}
15011496

1497+
Align ElementAlignment =
1498+
std::min(cast<StoreSDNode>(Ch)->getAlign(),
1499+
DAG.getSubtarget().getFrameLowering()->getStackAlign());
1500+
15021501
// Finally, load the updated vector.
1503-
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
1502+
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
1503+
MachinePointerInfo PtrInfo =
1504+
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
1505+
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
1506+
ElementAlignment);
15041507
}
15051508

15061509
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
9+
; CHECK-NEXT: andl $7, %edi
10+
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
11+
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
12+
; CHECK-NEXT: retq
13+
entry:
14+
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
15+
ret <8 x i32> %a
16+
}
17+
18+
attributes #0 = { "no-realign-stack" "target-cpu"="haswell" }

0 commit comments

Comments
 (0)