From 0941a9ee59ef0b7508ecd2602e6837111228f5bb Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 5 Mar 2025 04:26:11 +0000 Subject: [PATCH 1/2] [msan] Apply handleVectorReduceIntrinsic to max/min vector instructions Changes the handling of: - llvm.aarch64.neon.smaxv - llvm.aarch64.neon.sminv - llvm.aarch64.neon.umaxv - llvm.aarch64.neon.uminv - llvm.vector.reduce.smax - llvm.vector.reduce.smin - llvm.vector.reduce.umax - llvm.vector.reduce.umin - llvm.vector.reduce.fmax - llvm.vector.reduce.fmin from the default strict handling (visitInstruction) to handleVectorReduceIntrinsic. Updates the tests from https://github.com/llvm/llvm-project/pull/129741, https://github.com/llvm/llvm-project/pull/129810, https://github.com/llvm/llvm-project/pull/129768 --- .../Instrumentation/MemorySanitizer.cpp | 64 +++-- .../MemorySanitizer/AArch64/arm64-smaxv.ll | 139 ++++------- .../MemorySanitizer/AArch64/arm64-sminv.ll | 139 ++++------- .../MemorySanitizer/AArch64/arm64-umaxv.ll | 219 ++++++++---------- .../MemorySanitizer/AArch64/arm64-uminv.ll | 219 ++++++++---------- .../expand-experimental-reductions.ll | 71 +----- 6 files changed, 320 insertions(+), 531 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 52e42932fc751..fdf20218635f0 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3653,14 +3653,18 @@ struct MemorySanitizerVisitor : public InstVisitor { // Instrument generic vector reduction intrinsics // by ORing together all their fields. // - // The return type does not need to be the same type as the fields + // If AllowShadowCast is true, the return type does not need to be the same + // type as the fields // e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>) - void handleVectorReduceIntrinsic(IntrinsicInst &I) { + void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) { assert(I.arg_size() == 1); IRBuilder<> IRB(&I); Value *S = IRB.CreateOrReduce(getShadow(&I, 0)); - S = CreateShadowCast(IRB, S, getShadowTy(&I)); + if (AllowShadowCast) + S = CreateShadowCast(IRB, S, getShadowTy(&I)); + else + assert(S->getType() == getShadowTy(&I)); setShadow(&I, S); setOriginForNaryOp(I); } @@ -3669,13 +3673,18 @@ struct MemorySanitizerVisitor : public InstVisitor { // e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> // %a1) // shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1] + // + // The type of the return value, initial starting value, and elements of the + // vector must be identical. void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) { assert(I.arg_size() == 2); IRBuilder<> IRB(&I); Value *Shadow0 = getShadow(&I, 0); Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1)); + assert(Shadow0->getType() == Shadow1->getType()); Value *S = IRB.CreateOr(Shadow0, Shadow1); + assert(S->getType() == getShadowTy(&I)); setShadow(&I, S); setOriginForNaryOp(I); } @@ -4458,21 +4467,17 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_xor: case Intrinsic::vector_reduce_mul: - // Add reduction to scalar - case Intrinsic::aarch64_neon_faddv: - case Intrinsic::aarch64_neon_saddv: - case Intrinsic::aarch64_neon_uaddv: - // Floating-point min/max (vector) - // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v, - // but our shadow propagation is the same. - case Intrinsic::aarch64_neon_fmaxv: - case Intrinsic::aarch64_neon_fminv: - case Intrinsic::aarch64_neon_fmaxnmv: - case Intrinsic::aarch64_neon_fminnmv: - // Sum long across vector - case Intrinsic::aarch64_neon_saddlv: - case Intrinsic::aarch64_neon_uaddlv: - handleVectorReduceIntrinsic(I); + // Signed/Unsigned Min/Max + // TODO: handling similarly to AND/OR may be more precise. + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + // TODO: this has no false positives, but arguably we should check that all + // the bits are initialized. + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false); break; case Intrinsic::vector_reduce_fadd: @@ -4903,6 +4908,29 @@ struct MemorySanitizerVisitor : public InstVisitor { break; } + // Add reduction to scalar + case Intrinsic::aarch64_neon_faddv: + case Intrinsic::aarch64_neon_saddv: + case Intrinsic::aarch64_neon_uaddv: + // Signed/Unsigned min/max (Vector) + // TODO: handling similarly to AND/OR may be more precise. + case Intrinsic::aarch64_neon_smaxv: + case Intrinsic::aarch64_neon_sminv: + case Intrinsic::aarch64_neon_umaxv: + case Intrinsic::aarch64_neon_uminv: + // Floating-point min/max (vector) + // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v, + // but our shadow propagation is the same. + case Intrinsic::aarch64_neon_fmaxv: + case Intrinsic::aarch64_neon_fminv: + case Intrinsic::aarch64_neon_fmaxnmv: + case Intrinsic::aarch64_neon_fminnmv: + // Sum long across vector + case Intrinsic::aarch64_neon_saddlv: + case Intrinsic::aarch64_neon_uaddlv: + handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true); + break; + // Saturating extract narrow case Intrinsic::aarch64_neon_sqxtn: case Intrinsic::aarch64_neon_sqxtun: diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll index cbbd55d3e3497..632268e08022c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll @@ -2,9 +2,6 @@ ; RUN: opt < %s -passes=msan -S | FileCheck %s ; ; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll -; -; Handled suboptimally (visitInstruction): -; - llvm.aarch64.neon.smaxv target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" @@ -15,16 +12,12 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8 -; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i8 [[TMP4]] ; entry: @@ -39,16 +32,12 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16 -; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i16 [[TMP4]] ; entry: @@ -63,15 +52,9 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]]) -; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[VMAXV_I]] ; entry: @@ -85,16 +68,12 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8 -; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i8 [[TMP4]] ; entry: @@ -109,16 +88,12 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16 -; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i16 [[TMP4]] ; entry: @@ -133,15 +108,9 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) ; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]]) -; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[VMAXV_I]] ; entry: @@ -156,16 +125,12 @@ define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP7]] @@ -184,16 +149,12 @@ define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP7]] @@ -212,15 +173,9 @@ define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1 ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP6]] @@ -238,16 +193,12 @@ define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP7]] @@ -266,16 +217,12 @@ define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP7]] @@ -294,15 +241,9 @@ define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP6]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll index b4f3cce44c306..267061027cd52 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll @@ -2,9 +2,6 @@ ; RUN: opt < %s -passes=msan -S | FileCheck %s ; ; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll -; -; Handled suboptimally (visitInstruction): -; - llvm.aarch64.neon.sminv target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" @@ -15,16 +12,12 @@ define signext i8 @test_vminv_s8(<8 x i8> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMINV_I]] to i8 -; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i8 [[TMP4]] ; entry: @@ -39,16 +32,12 @@ define signext i16 @test_vminv_s16(<4 x i16> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMINV_I]] to i16 -; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i16 [[TMP4]] ; entry: @@ -63,15 +52,9 @@ define i32 @test_vminv_s32(<2 x i32> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[A1]]) -; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[VMINV_I]] ; entry: @@ -85,16 +68,12 @@ define signext i8 @test_vminvq_s8(<16 x i8> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMINV_I]] to i8 -; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i8 [[TMP4]] ; entry: @@ -109,16 +88,12 @@ define signext i16 @test_vminvq_s16(<8 x i16> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A1]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMINV_I]] to i16 -; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i16 [[TMP4]] ; entry: @@ -133,15 +108,9 @@ define i32 @test_vminvq_s32(<4 x i32> %a1) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) ; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A1]]) -; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[VMINV_I]] ; entry: @@ -156,16 +125,12 @@ define <8 x i8> @test_vminv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP7]] @@ -184,16 +149,12 @@ define <4 x i16> @test_vminv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP7]] @@ -212,15 +173,9 @@ define <2 x i32> @test_vminv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1 ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP6]] @@ -238,16 +193,12 @@ define <16 x i8> @test_vminvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP7]] @@ -266,16 +217,12 @@ define <8 x i16> @test_vminvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP7]] @@ -294,15 +241,9 @@ define <4 x i32> @test_vminvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP6]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll index a2af44e1862d1..0a58bdc3d6d0b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll @@ -2,14 +2,6 @@ ; RUN: opt < %s -passes=msan -S | FileCheck %s ; ; Forked from llvm/test/CodeGen/AArch64/arm64-umaxv.ll -; -; Handled suboptimally (visitInstruction): -; - llvm.aarch64.neon.umaxv.i32.v16i8 -; - llvm.aarch64.neon.umaxv.i32.v2i32 -; - llvm.aarch64.neon.umaxv.i32.v4i16 -; - llvm.aarch64.neon.umaxv.i32.v4i32 -; - llvm.aarch64.neon.umaxv.i32.v8i16 -; - llvm.aarch64.neon.umaxv.i32.v8i8 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" @@ -20,25 +12,24 @@ define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP3]]) +; CHECK-NEXT: [[VMAXV_I:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[VMAXV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMAXV_I1]] to i8 +; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i8 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -46,8 +37,8 @@ define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -74,25 +65,24 @@ define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]]) +; CHECK-NEXT: [[VMAXV_I:%.*]] = zext i16 [[TMP7]] to i32 +; CHECK-NEXT: [[VMAXV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i16 -; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMAXV_I1]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = xor i16 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i16 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -100,8 +90,8 @@ define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -126,25 +116,24 @@ define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]]) +; CHECK-NEXT: [[VMAXV_I:%.*]] = zext i16 [[TMP7]] to i32 +; CHECK-NEXT: [[VMAXV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i16 -; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMAXV_I1]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = xor i16 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i16 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -152,8 +141,8 @@ define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -178,25 +167,24 @@ define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP3]]) +; CHECK-NEXT: [[VMAXV_I:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[VMAXV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMAXV_I1]] to i8 +; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i8 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -204,8 +192,8 @@ define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -231,16 +219,12 @@ define <8 x i8> @test_vmaxv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP7]] @@ -259,16 +243,12 @@ define <4 x i16> @test_vmaxv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP7]] @@ -287,15 +267,9 @@ define <2 x i32> @test_vmaxv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1 ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP6]] @@ -313,16 +287,12 @@ define <16 x i8> @test_vmaxvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP7]] @@ -341,16 +311,12 @@ define <8 x i16> @test_vmaxvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP7]] @@ -369,15 +335,9 @@ define <4 x i32> @test_vmaxvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP6]] @@ -396,3 +356,6 @@ declare i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32>) nounwind readnone declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>) nounwind readnone attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll index 67038f495be7b..d85819aa95589 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll @@ -2,14 +2,6 @@ ; RUN: opt < %s -passes=msan -S | FileCheck %s ; ; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll -; -; Handled suboptimally (visitInstruction): -; - llvm.aarch64.neon.uminv.i32.v16i8 -; - llvm.aarch64.neon.uminv.i32.v2i32 -; - llvm.aarch64.neon.uminv.i32.v4i16 -; - llvm.aarch64.neon.uminv.i32.v4i32 -; - llvm.aarch64.neon.uminv.i32.v8i16 -; - llvm.aarch64.neon.uminv.i32.v8i8 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" @@ -20,25 +12,24 @@ define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP3]]) +; CHECK-NEXT: [[VMINV_I:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[VMINV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMINV_I1]] to i8 +; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i8 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -46,8 +37,8 @@ define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -74,25 +65,24 @@ define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]]) +; CHECK-NEXT: [[VMINV_I:%.*]] = zext i16 [[TMP7]] to i32 +; CHECK-NEXT: [[VMINV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i16 -; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMINV_I1]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = xor i16 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i16 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -100,8 +90,8 @@ define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -126,25 +116,24 @@ define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]]) +; CHECK-NEXT: [[VMINV_I:%.*]] = zext i16 [[TMP7]] to i32 +; CHECK-NEXT: [[VMINV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i16 -; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMINV_I1]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = xor i16 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i16 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -152,8 +141,8 @@ define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -178,25 +167,24 @@ define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: -; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP3]]) +; CHECK-NEXT: [[VMINV_I:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[VMINV_I1:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[VMINV_I1]] to i8 +; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = xor i8 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 -; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB10]]: ; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 @@ -204,8 +192,8 @@ define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp #0 { ; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: br label %[[RETURN]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB8]] ] +; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi i32 [ [[_MSRET]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[BB10]] ] ; CHECK-NEXT: store i32 [[_MSPHI_S]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; @@ -231,16 +219,12 @@ define <8 x i8> @test_vminv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP7]] @@ -259,16 +243,12 @@ define <4 x i16> @test_vminv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP7]] @@ -287,15 +267,9 @@ define <2 x i32> @test_vminv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1 ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP6]] @@ -313,16 +287,12 @@ define <16 x i8> @test_vminvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3 ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP7]] @@ -341,16 +311,12 @@ define <8 x i16> @test_vminvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A2]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3 ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP7]] @@ -369,15 +335,9 @@ define <4 x i32> @test_vminvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) # ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A2]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP6]] @@ -395,3 +355,6 @@ declare i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32>) nounwind readnone declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) nounwind readnone attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll b/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll index 7a4b433fc8be6..0696ac92e59b8 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll @@ -2,14 +2,6 @@ ; RUN: opt < %s -passes=msan -S | FileCheck %s ; ; Forked from llvm/test/CodeGen/Generic/expand-experimental-reductions.ll -; -; Handled suboptimally by visitInstruction: -; - llvm.vector.reduce.smax -; - llvm.vector.reduce.smin -; - llvm.vector.reduce.umax -; - llvm.vector.reduce.umin -; - llvm.vector.reduce.fmax -; - llvm.vector.reduce.fmin target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -268,15 +260,9 @@ define i64 @smax_i64(<2 x i64> %vec) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) ; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[VEC]]) -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[R]] ; entry: @@ -290,15 +276,9 @@ define i64 @smin_i64(<2 x i64> %vec) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) ; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> [[VEC]]) -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[R]] ; entry: @@ -312,15 +292,9 @@ define i64 @umax_i64(<2 x i64> %vec) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) ; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> [[VEC]]) -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[R]] ; entry: @@ -334,15 +308,9 @@ define i64 @umin_i64(<2 x i64> %vec) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) ; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[VEC]]) -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[R]] ; entry: @@ -357,15 +325,9 @@ define double @fmax_f64(<2 x double> %vec) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) ; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC]]) -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret double [[R]] ; entry: @@ -380,15 +342,9 @@ define double @fmin_f64(<2 x double> %vec) #0 { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] -; CHECK: [[BB2]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) ; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC]]) -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret double [[R]] ; entry: @@ -417,6 +373,3 @@ entry: } attributes #0 = { sanitize_memory } -;. -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} -;. From 42de9961e00019bc2930f9509a8d8820f0f46e2e Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 5 Mar 2025 04:33:03 +0000 Subject: [PATCH 2/2] Remove branch weights assertions --- .../Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll | 3 --- .../Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll | 3 --- 2 files changed, 6 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll index 0a58bdc3d6d0b..95f11a05d9d2d 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll @@ -356,6 +356,3 @@ declare i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32>) nounwind readnone declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>) nounwind readnone attributes #0 = { sanitize_memory } -;. -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} -;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll index d85819aa95589..ad51395691050 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll @@ -355,6 +355,3 @@ declare i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32>) nounwind readnone declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) nounwind readnone attributes #0 = { sanitize_memory } -;. -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} -;.