diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bdb7917073020..f2d40d28b3100 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1474,11 +1474,17 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + Align BaseVecAlignment = + DAG.getMachineFunction().getFrameInfo().getObjectAlign(FI); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + BaseVecAlignment); // Freeze the index so we don't poison the clamping code we're about to emit. Idx = DAG.getFreeze(Idx); + Type *PartTy = PartVT.getTypeForEVT(*DAG.getContext()); + Align PartAlignment = DAG.getDataLayout().getPrefTypeAlign(PartTy); + // Then store the inserted part. if (PartVT.isVector()) { SDValue SubStackPtr = @@ -1487,7 +1493,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Store the subvector. Ch = DAG.getStore( Ch, dl, Part, SubStackPtr, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), + PartAlignment); } else { SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); @@ -1496,11 +1503,15 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Ch = DAG.getTruncStore( Ch, dl, Part, SubStackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), - VecVT.getVectorElementType()); + VecVT.getVectorElementType(), PartAlignment); } + assert(cast(Ch)->getAlign() == PartAlignment && + "ElementAlignment does not match!"); + // Finally, load the updated vector. - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, + BaseVecAlignment); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { diff --git a/llvm/test/CodeGen/X86/insert-into-vector-through-stack-no-stack-realign.ll b/llvm/test/CodeGen/X86/insert-into-vector-through-stack-no-stack-realign.ll new file mode 100644 index 0000000000000..8006263762152 --- /dev/null +++ b/llvm/test/CodeGen/X86/insert-into-vector-through-stack-no-stack-realign.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s + +define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4) +; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0 +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i32> %arg1, i32 42, i32 %n + ret <8 x i32> %a +} + +define <8 x i32> @foo2(<8 x i32> %arg1, i32 %n) alignstack(8) #0 { +; CHECK-LABEL: foo2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movl $42, -32(%rsp,%rdi,4) +; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0 +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i32> %arg1, i32 42, i32 %n + ret <8 x i32> %a +} + +define <8 x i32> @foo3(<8 x i32> %arg1, i32 %n) alignstack(16) #0 { +; CHECK-LABEL: foo3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4) +; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0 +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i32> %arg1, i32 42, i32 %n + ret <8 x i32> %a +} + +define <8 x i32> @foo4(<8 x i32> %arg1, i32 %n) alignstack(64) #0 { +; CHECK-LABEL: foo4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovaps %ymm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movl $42, -56(%rsp,%rdi,4) +; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %ymm0 +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i32> %arg1, i32 42, i32 %n + ret <8 x i32> %a +} + +define <8 x i32> @foo5(<8 x i32> %arg1, i32 %n) alignstack(256) #0 { +; CHECK-LABEL: foo5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $120, %rsp +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movl $42, 64(%rsp,%rdi,4) +; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm0 +; CHECK-NEXT: addq $120, %rsp +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i32> %arg1, i32 42, i32 %n + ret <8 x i32> %a +} + +define <8 x i16> @foo6(<8 x i16> %arg1, i32 %n) #0 { +; CHECK-LABEL: foo6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movw $42, -24(%rsp,%rdi,2) +; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i16> %arg1, i16 42, i32 %n + ret <8 x i16> %a +} + +define <8 x i8> @foo7(<8 x i8> %arg1, i32 %n) #0 { +; CHECK-LABEL: foo7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $15, %edi +; CHECK-NEXT: movb $42, -24(%rsp,%rdi) +; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i8> %arg1, i8 42, i32 %n + ret <8 x i8> %a +} + +define <8 x i64> @foo8(<8 x i64> %arg1, i32 %n) #0 { +; CHECK-LABEL: foo8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movq $42, -72(%rsp,%rdi,8) +; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0 +; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm1 +; CHECK-NEXT: retq +entry: + %a = insertelement <8 x i64> %arg1, i64 42, i32 %n + ret <8 x i64> %a +} + +attributes #0 = { "no-realign-stack" nounwind }