Skip to content

[msan] Apply handleVectorReduceIntrinsic to max/min vector instructions #129819

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 46 additions & 18 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3653,14 +3653,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Instrument generic vector reduction intrinsics
// by ORing together all their fields.
//
// The return type does not need to be the same type as the fields
// If AllowShadowCast is true, the return type does not need to be the same
// type as the fields
// e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
void handleVectorReduceIntrinsic(IntrinsicInst &I) {
void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
assert(I.arg_size() == 1);

IRBuilder<> IRB(&I);
Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
S = CreateShadowCast(IRB, S, getShadowTy(&I));
if (AllowShadowCast)
S = CreateShadowCast(IRB, S, getShadowTy(&I));
else
assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
Expand All @@ -3669,13 +3673,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
// %a1)
// shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
//
// The type of the return value, initial starting value, and elements of the
// vector must be identical.
void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
assert(I.arg_size() == 2);

IRBuilder<> IRB(&I);
Value *Shadow0 = getShadow(&I, 0);
Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
assert(Shadow0->getType() == Shadow1->getType());
Value *S = IRB.CreateOr(Shadow0, Shadow1);
assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
Expand Down Expand Up @@ -4458,21 +4467,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_mul:
// Add reduction to scalar
case Intrinsic::aarch64_neon_faddv:
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_uaddv:
// Floating-point min/max (vector)
// The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
// but our shadow propagation is the same.
case Intrinsic::aarch64_neon_fmaxv:
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
case Intrinsic::aarch64_neon_fminnmv:
// Sum long across vector
case Intrinsic::aarch64_neon_saddlv:
case Intrinsic::aarch64_neon_uaddlv:
handleVectorReduceIntrinsic(I);
// Signed/Unsigned Min/Max
// TODO: handling similarly to AND/OR may be more precise.
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin:
// TODO: this has no false positives, but arguably we should check that all
// the bits are initialized.
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
break;

case Intrinsic::vector_reduce_fadd:
Expand Down Expand Up @@ -4903,6 +4908,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}

// Add reduction to scalar
case Intrinsic::aarch64_neon_faddv:
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_uaddv:
// Signed/Unsigned min/max (Vector)
// TODO: handling similarly to AND/OR may be more precise.
case Intrinsic::aarch64_neon_smaxv:
case Intrinsic::aarch64_neon_sminv:
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv:
// Floating-point min/max (vector)
// The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
// but our shadow propagation is the same.
case Intrinsic::aarch64_neon_fmaxv:
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
case Intrinsic::aarch64_neon_fminnmv:
// Sum long across vector
case Intrinsic::aarch64_neon_saddlv:
case Intrinsic::aarch64_neon_uaddlv:
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
break;

// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
Expand Down
139 changes: 40 additions & 99 deletions llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
; RUN: opt < %s -passes=msan -S | FileCheck %s
;
; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll
;
; Handled suboptimally (visitInstruction):
; - llvm.aarch64.neon.smaxv

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
Expand All @@ -15,16 +12,12 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
Expand All @@ -39,16 +32,12 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
Expand All @@ -63,15 +52,9 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]])
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
Expand All @@ -85,16 +68,12 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
Expand All @@ -109,16 +88,12 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
Expand All @@ -133,15 +108,9 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
; CHECK: [[BB2]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]])
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
Expand All @@ -156,16 +125,12 @@ define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i8> [[TMP7]]
Expand All @@ -184,16 +149,12 @@ define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP7]]
Expand All @@ -212,15 +173,9 @@ define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]])
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP6]]
Expand All @@ -238,16 +193,12 @@ define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[TMP7]]
Expand All @@ -266,16 +217,12 @@ define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]])
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP7]]
Expand All @@ -294,15 +241,9 @@ define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
; CHECK: [[BB3]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]])
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
Expand Down
Loading
Loading