From 13fd73932251843173cbbc31ca93905ca0469277 Mon Sep 17 00:00:00 2001 From: Ammarguellat Date: Mon, 12 Feb 2024 10:58:19 -0800 Subject: [PATCH 01/18] [CLANG] Full support of complex multiplication and division. --- clang/docs/UsersManual.rst | 32 +-- clang/include/clang/Basic/LangOptions.h | 27 ++- clang/include/clang/Driver/Options.td | 25 +-- clang/lib/CodeGen/CGExprComplex.cpp | 31 ++- clang/lib/Driver/ToolChains/Clang.cpp | 87 ++++---- clang/lib/Parse/ParsePragma.cpp | 4 +- clang/test/CodeGen/cx-complex-range.c | 222 +++++++++++-------- clang/test/CodeGen/pragma-cx-limited-range.c | 179 +++++++++++++-- clang/test/Driver/range.c | 103 ++++++--- 9 files changed, 475 insertions(+), 235 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 7391e4cf3a9ae..9ea5f89ece751 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1847,19 +1847,25 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + + * ``limited`` Implementation of complex division and multiplication using + algebraic formulas at source precision. Overflow and non-finites values + are not handled. + * ``smith`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + Overflow is handled. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled. + * ``extend`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. .. _floating-point-environment: diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 862952d336ef3..b8b96d0688829 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -396,7 +396,32 @@ class LangOptionsBase { IncompleteOnly = 3, }; - enum ComplexRangeKind { CX_Full, CX_Limited, CX_Fortran, CX_None }; + /// Controls the various implementations for complex multiplication and + // division. + enum ComplexRangeKind { + /// Implementation of complex division and multiplication using a call to + /// runtime library functions (generally the case, but the BE might + /// sometimes replace the library call if it knows enough about the + /// potential range of the inputs). Overflow and non-finite values are + /// handled. + CX_Full, + + /// Implementation of complex division using the Smith algorithm at source + /// precision. Overflow is handled. + CX_Smith, + + /// Implementation of complex division using algebraic formulas at higher + /// precision. Overflow is handled. + CX_Extend, + + /// Implementation of complex division and multiplication using algebraic + /// formulas at source precision. Overflow and non-finites values are not + /// handled. + CX_Limited, + + /// No range rule is enabled. + CX_None + }; // Define simple language options (with no accessors). #define LANGOPT(Name, Bits, Default, Description) unsigned Name : Bits; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 31e8571758bfc..848cc38188d86 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1012,28 +1012,15 @@ defm offload_uniform_block : BoolFOption<"offload-uniform-block", NegFlag, BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>; -def fcx_limited_range : Joined<["-"], "fcx-limited-range">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Basic algebraic expansions of complex arithmetic operations " - "involving are enabled.">; - -def fno_cx_limited_range : Joined<["-"], "fno-cx-limited-range">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Basic algebraic expansions of complex arithmetic operations " - "involving are disabled.">; - -def fcx_fortran_rules : Joined<["-"], "fcx-fortran-rules">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Range reduction is enabled for complex arithmetic operations.">; - -def fno_cx_fortran_rules : Joined<["-"], "fno-cx-fortran-rules">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Range reduction is disabled for complex arithmetic operations.">; +def fcomplex_arithmetic_EQ : Joined<["-"], "fcomplex-arithmetic=">, Group, + Visibility<[ClangOption, CC1Option]>, + Values<"full,smith,extend,limited">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["CX_Full", "CX_Smith", "CX_Extend", "CX_Limited"]>; def complex_range_EQ : Joined<["-"], "complex-range=">, Group, Visibility<[CC1Option]>, - Values<"full,limited,fortran">, NormalizedValuesScope<"LangOptions">, - NormalizedValues<["CX_Full", "CX_Limited", "CX_Fortran"]>, + Values<"full,smith,extend,limited">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["CX_Full", "CX_Smith", "CX_Extend", "CX_Limited"]>, MarshallingInfoEnum, "CX_Full">; // OpenCL-only Options diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 9ddf0e763f139..e01be6e5fdd6e 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -283,9 +283,23 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - QualType getPromotionType(QualType Ty) { + QualType getPromotionType(QualType Ty, bool IsDivOpCode = false) { if (auto *CT = Ty->getAs()) { QualType ElementType = CT->getElementType(); + if (CGF.getLangOpts().getComplexRange() == + LangOptions::ComplexRangeKind::CX_Extend && + IsDivOpCode) { + if (ElementType->isFloatingType()) { + if (const auto *BT = dyn_cast(ElementType)) + switch (BT->getKind()) { + case BuiltinType::Kind::Float: + return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); + default: + return CGF.getContext().getComplexType( + CGF.getContext().LongDoubleTy); + } + } + } if (ElementType.UseExcessPrecision(CGF.getContext())) return CGF.getContext().getComplexType(CGF.getContext().FloatTy); } @@ -296,11 +310,12 @@ class ComplexExprEmitter #define HANDLEBINOP(OP) \ ComplexPairTy VisitBin##OP(const BinaryOperator *E) { \ - QualType promotionTy = getPromotionType(E->getType()); \ + QualType promotionTy = getPromotionType( \ + E->getType(), \ + (E->getOpcode() == BinaryOperatorKind::BO_Div) ? true : false); \ ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy)); \ if (!promotionTy.isNull()) \ - result = \ - CGF.EmitUnPromotedValue(result, E->getType()); \ + result = CGF.EmitUnPromotedValue(result, E->getType()); \ return result; \ } @@ -790,7 +805,8 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { ResI = Builder.CreateFAdd(AD, BC, "mul_i"); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran) + Op.FPFeatures.getComplexRange() == LangOptions::CX_Smith || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Extend) return ComplexPairTy(ResR, ResI); // Emit the test for the real part becoming NaN and create a branch to @@ -981,9 +997,10 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { llvm::Value *OrigLHSi = LHSi; if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); - if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran) + if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Smith) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); - else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited) + else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Extend) return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi); else if (!CGF.getLangOpts().FastMath || // '-ffast-math' is used in the command line but followed by an diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 942ebbc410607..7f4cb46a44f4c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2687,45 +2687,42 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, } } -static StringRef EnumComplexRangeToStr(LangOptions::ComplexRangeKind Range) { - StringRef RangeStr = ""; +static std::string ComplexRangeKindToStr(LangOptions::ComplexRangeKind Range) { switch (Range) { + case LangOptions::ComplexRangeKind::CX_Full: + return "full"; + break; case LangOptions::ComplexRangeKind::CX_Limited: - return "-fcx-limited-range"; + return "limited"; break; - case LangOptions::ComplexRangeKind::CX_Fortran: - return "-fcx-fortran-rules"; + case LangOptions::ComplexRangeKind::CX_Smith: + return "smith"; break; - default: - return RangeStr; + case LangOptions::ComplexRangeKind::CX_Extend: + return "extend"; break; + default: + return ""; } } +static std::string ComplexArithmeticStr(LangOptions::ComplexRangeKind Range) { + return "-fcomplex-arithmetic=" + ComplexRangeKindToStr(Range); +} + static void EmitComplexRangeDiag(const Driver &D, LangOptions::ComplexRangeKind Range1, LangOptions::ComplexRangeKind Range2) { if (Range1 != Range2 && Range1 != LangOptions::ComplexRangeKind::CX_None) D.Diag(clang::diag::warn_drv_overriding_option) - << EnumComplexRangeToStr(Range1) << EnumComplexRangeToStr(Range2); + << ComplexArithmeticStr(Range1) << ComplexArithmeticStr(Range2); } static std::string RenderComplexRangeOption(LangOptions::ComplexRangeKind Range) { - std::string ComplexRangeStr = "-complex-range="; - switch (Range) { - case LangOptions::ComplexRangeKind::CX_Full: - ComplexRangeStr += "full"; - break; - case LangOptions::ComplexRangeKind::CX_Limited: - ComplexRangeStr += "limited"; - break; - case LangOptions::ComplexRangeKind::CX_Fortran: - ComplexRangeStr += "fortran"; - break; - default: - assert(0 && "Unexpected range option"); - } + std::string ComplexRangeStr = ComplexRangeKindToStr(Range); + if (!ComplexRangeStr.empty()) + return "-complex-range=" + ComplexRangeStr; return ComplexRangeStr; } @@ -2789,24 +2786,24 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, switch (optID) { default: break; - case options::OPT_fcx_limited_range: { - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Limited); - Range = LangOptions::ComplexRangeKind::CX_Limited; - break; - } - case options::OPT_fno_cx_limited_range: - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full); - Range = LangOptions::ComplexRangeKind::CX_Full; - break; - case options::OPT_fcx_fortran_rules: { - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Fortran); - Range = LangOptions::ComplexRangeKind::CX_Fortran; + case options::OPT_fcomplex_arithmetic_EQ: { + LangOptions::ComplexRangeKind RangeVal; + StringRef Val = A->getValue(); + if (Val.equals("full")) + RangeVal = LangOptions::ComplexRangeKind::CX_Full; + else if (Val.equals("smith")) + RangeVal = LangOptions::ComplexRangeKind::CX_Smith; + else if (Val.equals("extend")) + RangeVal = LangOptions::ComplexRangeKind::CX_Extend; + else if (Val.equals("limited")) + RangeVal = LangOptions::ComplexRangeKind::CX_Limited; + else + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getSpelling() << LangOptions::ComplexRangeKind::CX_None; + EmitComplexRangeDiag(D, Range, RangeVal); + Range = RangeVal; break; } - case options::OPT_fno_cx_fortran_rules: - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full); - Range = LangOptions::ComplexRangeKind::CX_Full; - break; case options::OPT_ffp_model_EQ: { // If -ffp-model= is seen, reset to fno-fast-math HonorINFs = true; @@ -3235,16 +3232,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, if (Range != LangOptions::ComplexRangeKind::CX_None) ComplexRangeStr = RenderComplexRangeOption(Range); - if (!ComplexRangeStr.empty()) + if (!ComplexRangeStr.empty()) { CmdArgs.push_back(Args.MakeArgString(ComplexRangeStr)); - if (Args.hasArg(options::OPT_fcx_limited_range)) - CmdArgs.push_back("-fcx-limited-range"); - if (Args.hasArg(options::OPT_fcx_fortran_rules)) - CmdArgs.push_back("-fcx-fortran-rules"); - if (Args.hasArg(options::OPT_fno_cx_limited_range)) - CmdArgs.push_back("-fno-cx-limited-range"); - if (Args.hasArg(options::OPT_fno_cx_fortran_rules)) - CmdArgs.push_back("-fno-cx-fortran-rules"); + if (Args.hasArg(options::OPT_fcomplex_arithmetic_EQ)) + CmdArgs.push_back(Args.MakeArgString("-fcomplex-arithmetic=" + + ComplexRangeKindToStr(Range))); + } } static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs, diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 730ac1a0fee5c..dc938e4e4ced1 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -916,8 +916,8 @@ void Parser::HandlePragmaCXLimitedRange() { break; case tok::OOS_DEFAULT: // According to ISO C99 standard chapter 7.3.4, the default value - // for the pragma is ``off'. -fcx-limited-range and -fcx-fortran-rules - // control the default value of these pragmas. + // for the pragma is ``off'. -fcomplex-arithmetic controls the default value + // of these pragmas. Range = getLangOpts().getComplexRange(); break; } diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index 2d8507c710f20..f78fda61a9d1c 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -5,120 +5,156 @@ // RUN: -complex-range=limited -o - | FileCheck %s --check-prefix=LMTD // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -fno-cx-limited-range -o - | FileCheck %s --check-prefix=FULL +// RUN: -complex-range=smith -o - | FileCheck %s --check-prefix=SMITH // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=fortran -o - | FileCheck %s --check-prefix=FRTRN +// RUN: -complex-range=extend -o - | FileCheck %s --check-prefix=EXTND + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -complex-range=full -o - | FileCheck %s --check-prefix=FULL // Fast math // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=limited -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=LMTD-FAST +// RUN: | FileCheck %s --check-prefix=LMTD // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=full -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=FULL -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -fno-cx-fortran-rules -o - | FileCheck %s --check-prefix=FULL - -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -fcx-limited-range -fno-cx-limited-range -o - \ -// RUN: | FileCheck %s --check-prefix=FULL - -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -fno-cx-limited-range -fcx-limited-range -o - \ -// RUN: | FileCheck %s --check-prefix=FULL +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -ffast-math -complex-range=smith -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=SMITH -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -fno-cx-fortran-rules -fcx-fortran-rules -o - \ -// RUN: | FileCheck %s --check-prefix=FULL +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -ffast-math -complex-range=extend -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=EXTND _Complex float div(_Complex float a, _Complex float b) { // LABEL: define {{.*}} @div( - // FULL: call {{.*}} @__divsc3 - - // LMTD: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fsub float - // LMTD-NEXT: fdiv float - // LMTD-NEXT: fdiv float - - // FRTRN: call {{.*}}float @llvm.fabs.f32(float {{.*}}) - // FRTRN-NEXT: call {{.*}}float @llvm.fabs.f32(float {{.*}}) - // FRTRN-NEXT: fcmp {{.*}}ugt float - // FRTRN-NEXT: br i1 {{.*}}, label - // FRTRN: abs_rhsr_greater_or_equal_abs_rhsi: - // FRTRN-NEXT: fdiv {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fadd {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fadd {{.*}}float - // FRTRN-NEXT: fdiv {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fsub {{.*}}float - // FRTRN-NEXT: fdiv {{.*}}float - // FRTRN-NEXT: br label - // FRTRN: abs_rhsr_less_than_abs_rhsi: - // FRTRN-NEXT: fdiv {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fadd {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fadd {{.*}}float - // FRTRN-NEXT: fdiv {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fsub {{.*}}float - // FRTRN-NEXT: fdiv {{.*}}float - // FRTRN-NEXT: br label - // FRTRN: complex_div: - // FRTRN-NEXT: phi {{.*}}float - // FRTRN-NEXT: phi {{.*}}float - - // LMTD-FAST: fmul {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fadd {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fadd {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fsub {{.*}} float - // LMTD-FAST-NEXT: fdiv {{.*}} float - // LMTD-FAST-NEXT: fdiv {{.*}} float + // FULL: call {{.*}} @__divsc3 + // + // LMTD: fmul{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fadd{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fadd{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fsub{{.*}}float + // LMTD-NEXT: fdiv{{.*}}float + // LMTD-NEXT: fdiv{{.*}}float + // + // SMITH: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // SMITH-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // SMITH-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} + // SMITH-NEXT: br i1 {{.*}}, label + // SMITH: abs_rhsr_greater_or_equal_abs_rhsi: + // SMITH-NEXT: fdiv{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fadd{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fadd{{.*}}float + // SMITH-NEXT: fdiv{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fsub{{.*}}float + // SMITH-NEXT: fdiv{{.*}}float + // SMITH-NEXT: br label + // SMITH: abs_rhsr_less_than_abs_rhsi: + // SMITH-NEXT: fdiv{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fadd{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fadd{{.*}}float + // SMITH-NEXT: fdiv{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fsub{{.*}}float + // SMITH-NEXT: fdiv{{.*}}float + // + // EXTND: load float, ptr {{.*}} + // EXTND: fpext float {{.*}} to double + // EXTND-NEXT: fpext float {{.*}} to double + // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // EXTND-NEXT: load float, ptr {{.*}} + // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // EXTND-NEXT: load float, ptr {{.*}} + // EXTND-NEXT: fpext float {{.*}} to double + // EXTND-NEXT: fpext float {{.*}} to double + // EXTND-NEXT: fmul{{.*}}double + // EXTND-NEXT: fmul{{.*}}double + // EXTND-NEXT: fadd{{.*}}double + // EXTND-NEXT: fmul{{.*}}double + // EXTND-NEXT: fmul{{.*}}double + // EXTND-NEXT: fadd{{.*}}double + // EXTND-NEXT: fmul{{.*}}double + // EXTND-NEXT: fmul{{.*}}double + // EXTND-NEXT: fsub{{.*}}double + // EXTND-NEXT: fdiv{{.*}}double + // EXTND-NEXT: fdiv{{.*}}double + // EXTND-NEXT: fptrunc double {{.*}} to float + // EXTND-NEXT: fptrunc double {{.*}} to float return a / b; } _Complex float mul(_Complex float a, _Complex float b) { // LABEL: define {{.*}} @mul( - // FULL: call {{.*}} @__mulsc3 - - // LMTD: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fsub float - // LMTD-NEXT: fadd float - - // FRTRN: fmul {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fmul {{.*}}float - // FRTRN-NEXT: fsub {{.*}}float - // FRTRN-NEXT: fadd {{.*}}float - - // LMTD-FAST: fmul {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fmul {{.*}} float - // LMTD-FAST-NEXT: fsub {{.*}} float - // LMTD-FAST-NEXT: fadd {{.*}} float + // FULL: call {{.*}} @__mulsc3 + // + // LMTD: alloca { float, float } + // LMTD-NEXT: alloca { float, float } + // LMTD-NEXT: alloca { float, float } + // LMTD: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // LMTD-NEXT: load float, ptr {{.*}} + // LMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // LMTD-NEXT: load float, ptr {{.*}} + // LMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // LMTD-NEXT: load float, ptr {{.*}} + // LMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // LMTD-NEXT: load float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fmul{{.*}}float + // LMTD-NEXT: fsub{{.*}}float + // LMTD-NEXT: fadd{{.*}}float + // + // SMITH: alloca { float, float } + // SMITH-NEXT: alloca { float, float } + // SMITH-NEXT: alloca { float, float } + // SMITH: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // SMITH-NEXT: load float, ptr {{.*}} + // SMITH-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // SMITH-NEXT: load float, ptr {{.*}} + // SMITH-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // SMITH-NEXT: load float, ptr {{.*}} + // SMITH-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // SMITH-NEXT: load float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fmul{{.*}}float + // SMITH-NEXT: fsub{{.*}}float + // SMITH-NEXT: fadd{{.*}}float + // + // EXTND: alloca { float, float } + // EXTND-NEXT: alloca { float, float } + // EXTND-NEXT: alloca { float, float } + // EXTND: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // EXTND-NEXT: load float, ptr + // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // EXTND-NEXT: load float, ptr {{.*}} + // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // EXTND-NEXT: load float, ptr {{.*}} + // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // EXTND-NEXT: load{{.*}}float + // EXTND-NEXT: fmul{{.*}}float + // EXTND-NEXT: fmul{{.*}}float + // EXTND-NEXT: fmul{{.*}}float + // EXTND-NEXT: fmul{{.*}}float + // EXTND-NEXT: fsub{{.*}}float + // EXTND-NEXT: fadd{{.*}}float return a * b; } diff --git a/clang/test/CodeGen/pragma-cx-limited-range.c b/clang/test/CodeGen/pragma-cx-limited-range.c index 926da8afbee55..bb0576dd51b98 100644 --- a/clang/test/CodeGen/pragma-cx-limited-range.c +++ b/clang/test/CodeGen/pragma-cx-limited-range.c @@ -5,17 +5,18 @@ // RUN: -complex-range=limited -o - | FileCheck --check-prefix=LMTD %s // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -fno-cx-limited-range -o - | FileCheck %s --check-prefix=FULL +// RUN: -complex-range=smith -o - | FileCheck --check-prefix=SMITH %s // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=fortran -o - | FileCheck --check-prefix=FRTRN %s +// RUN: -complex-range=extend -o - | FileCheck --check-prefix=EXTND %s // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -fno-cx-fortran-rules -o - | FileCheck --check-prefix=FULL %s +// RUN: -complex-range=full -o - | FileCheck --check-prefix=FULL %s _Complex float pragma_on_mul(_Complex float a, _Complex float b) { #pragma STDC CX_LIMITED_RANGE ON // LABEL: define {{.*}} @pragma_on_mul( + // FULL: fmul float // FULL-NEXT: fmul float // FULL-NEXT: fmul float @@ -30,12 +31,19 @@ _Complex float pragma_on_mul(_Complex float a, _Complex float b) { // LMTD-NEXT: fsub float // LMTD-NEXT: fadd float - // FRTRN: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fsub float - // FRTRN-NEXT: fadd float + // SMITH: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fsub float + // SMITH-NEXT: fadd float + + // EXTND: fmul float + // EXTND-NEXT: fmul float + // EXTND-NEXT: fmul float + // EXTND-NEXT: fmul float + // EXTND-NEXT: fsub float + // EXTND-NEXT: fadd float return a * b; } @@ -43,11 +51,14 @@ _Complex float pragma_on_mul(_Complex float a, _Complex float b) { _Complex float pragma_off_mul(_Complex float a, _Complex float b) { #pragma STDC CX_LIMITED_RANGE OFF // LABEL: define {{.*}} @pragma_off_mul( + // FULL: call {{.*}} @__mulsc3 // LMTD: call {{.*}} @__mulsc3 - // FRTRN: call {{.*}} @__mulsc3 + // SMITH: call {{.*}} @__mulsc3 + + // EXTND: call {{.*}} @__mulsc3 return a * b; } @@ -55,6 +66,7 @@ _Complex float pragma_off_mul(_Complex float a, _Complex float b) { _Complex float pragma_on_div(_Complex float a, _Complex float b) { #pragma STDC CX_LIMITED_RANGE ON // LABEL: define {{.*}} @pragma_on_div( + // FULL: fmul float // FULL-NEXT: fmul float // FULL-NEXT: fadd float @@ -79,17 +91,33 @@ _Complex float pragma_on_div(_Complex float a, _Complex float b) { // LMTD-NEXT: fdiv float // LMTD-NEXT: fdiv float - // FRTRN: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fadd float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fadd float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fsub float - // FRTRN-NEXT: fdiv float - // FRTRN-NEXT: fdiv float + // SMITH: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fsub float + // SMITH-NEXT: fdiv float + // SMITH-NEXT: fdiv float + + // EXTND: fpext float {{.*}} to double + // EXTND: fpext float {{.*}} to double + // EXTND: fmul double + // EXTND: fmul double + // EXTND: fadd double + // EXTND: fmul double + // EXTND: fmul double + // EXTND: fadd double + // EXTND: fmul double + // EXTND: fmul double + // EXTND: fsub double + // EXTND: fdiv double + // EXTND: fdiv double + // EXTND: fptrunc double + // EXTND: fptrunc double return a / b; } @@ -97,11 +125,118 @@ _Complex float pragma_on_div(_Complex float a, _Complex float b) { _Complex float pragma_off_div(_Complex float a, _Complex float b) { #pragma STDC CX_LIMITED_RANGE OFF // LABEL: define {{.*}} @pragma_off_div( + // FULL: call {{.*}} @__divsc3 // LMTD: call {{.*}} @__divsc3 - // FRTRN: call {{.*}} @__divsc3 + // SMITH: call {{.*}} @__divsc3 + + // EXTND: call {{.*}} @__divdc3 + + return a / b; +} + +_Complex float pragma_default_mul(_Complex float a, _Complex float b) { +#pragma STDC CX_LIMITED_RANGE DEFAULT + // LABEL: define {{.*}} @pragma_on_mul( + + // FULL: fmul float + // FULL-NEXT: fmul float + // FULL-NEXT: fmul float + // FULL-NEXT: fmul float + // FULL-NEXT: fsub float + // FULL-NEXT: fadd float + + // LMTD: fmul float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fsub float + // LMTD-NEXT: fadd float + + // SMITH: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fsub float + // SMITH-NEXT: fadd float + + // EXTND: fmul float + // EXTND-NEXT: fmul float + // EXTND-NEXT: fmul float + // EXTND-NEXT: fmul float + // EXTND-NEXT: fsub float + // EXTND-NEXT: fadd float + + return a * b; +} +_Complex float pragma_default_div(_Complex float a, _Complex float b) { +#pragma STDC CX_LIMITED_RANGE DEFAULT + // LABEL: define {{.*}} @pragma_on_divx( + + // FULL: call {{.*}} @__divsc3 + + // LMTD: fmul float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fadd float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fadd float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fmul float + // LMTD-NEXT: fsub float + // LMTD-NEXT: fdiv float + // LMTD-NEXT: fdiv float + + // SMITH: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // SMITH-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // SMITH-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} + // SMITH-NEXT: br i1 {{.*}}, label + // SMITH: abs_rhsr_greater_or_equal_abs_rhsi: + // SMITH-NEXT: fdiv float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fdiv float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fsub float + // SMITH-NEXT: fdiv float + // SMITH-NEXT: br label + // SMITH: abs_rhsr_less_than_abs_rhsi: + // SMITH-NEXT: fdiv float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fdiv float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fsub float + // SMITH-NEXT: fdiv float + + // EXTND: load float, ptr {{.*}} + // EXTND: fpext float {{.*}} to double + // EXTND-NEXT: fpext float {{.*}} to double + // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // EXTND-NEXT: load float, ptr {{.*}} + // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // EXTND-NEXT: load float, ptr {{.*}} + // EXTND-NEXT: fpext float {{.*}} to double + // EXTND-NEXT: fpext float {{.*}} to double + // EXTND-NEXT: fmul double + // EXTND-NEXT: fmul double + // EXTND-NEXT: fadd double + // EXTND-NEXT: fmul double + // EXTND-NEXT: fmul double + // EXTND-NEXT: fadd double + // EXTND-NEXT: fmul double + // EXTND-NEXT: fmul double + // EXTND-NEXT: fsub double + // EXTND-NEXT: fdiv double + // EXTND-NEXT: fdiv double + // EXTND-NEXT: fptrunc double {{.*}} to float + // EXTND-NEXT: fptrunc double {{.*}} to float return a / b; } diff --git a/clang/test/Driver/range.c b/clang/test/Driver/range.c index 49116df2f4480..434e9407664ce 100644 --- a/clang/test/Driver/range.c +++ b/clang/test/Driver/range.c @@ -1,52 +1,93 @@ // Test range options for complex multiplication and division. -// RUN: %clang -### -target x86_64 -fcx-limited-range -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=LMTD %s -// RUN: %clang -### -target x86_64 -fno-cx-limited-range -c %s 2>&1 \ -// RUN: | FileCheck %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=SMITH %s -// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-limited-range \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=EXTND %s -// RUN: %clang -### -target x86_64 -fcx-fortran-rules -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=FRTRN %s - -// RUN: %clang -### -target x86_64 -fno-cx-fortran-rules -c %s 2>&1 \ -// RUN: | FileCheck %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FULL %s -// RUN: %clang -### -target x86_64 -fcx-limited-range \ -// RUN: -fcx-fortran-rules -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited \ +// RUN: -fcomplex-arithmetic=smith -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN1 %s -// RUN: %clang -### -target x86_64 -fcx-fortran-rules \ -// RUN: -fcx-limited-range -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited \ +// RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN2 %s -// RUN: %clang -### -target x86_64 -ffast-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=LMTD %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited \ +// RUN: -fcomplex-arithmetic=extend -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN3 %s -// RUN: %clang -### -target x86_64 -ffast-math -fcx-limited-range -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=LMTD %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith \ +// RUN: -fcomplex-arithmetic=limited -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN4 %s -// RUN: %clang -### -target x86_64 -fcx-limited-range -ffast-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=LMTD %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith \ +// RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN5 %s -// RUN: %clang -### -target x86_64 -ffast-math -fno-cx-limited-range -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=FULL %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith \ +// RUN: -fcomplex-arithmetic=extend -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN6 %s + + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend \ +// RUN: -fcomplex-arithmetic=limited -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN7 %s + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend \ +// RUN: -fcomplex-arithmetic=smith -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN8 %s -// RUN: %clang -### -Werror -target x86_64 -fcx-limited-range -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend \ +// RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN9 %s + + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ +// RUN: -fcomplex-arithmetic=limited -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN10 %s + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ +// RUN: -fcomplex-arithmetic=smith -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN11 %s + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ +// RUN: -fcomplex-arithmetic=extend -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN12 %s + +// RUN: %clang -### -target x86_64 -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=LMTD %s + +// RUN: %clang -### -target x86_64 -ffast-math -fcomplex-arithmetic=limited -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=LMTD %s -// RUN: %clang -### -Werror -target x86_64 -fcx-fortran-rules -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=FRTRN %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=LMTD %s // LMTD: -complex-range=limited // FULL: -complex-range=full -// LMTD-NOT: -complex-range=fortran +// EXTND: -complex-range=extend +// LMTD-NOT: -complex-range=smith // CHECK-NOT: -complex-range=limited -// FRTRN: -complex-range=fortran -// FRTRN-NOT: -complex-range=limited -// CHECK-NOT: -complex-range=fortran -// WARN1: warning: overriding '-fcx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option] -// WARN2: warning: overriding '-fcx-fortran-rules' option with '-fcx-limited-range' [-Woverriding-option] +// SMITH: -complex-range=smith +// SMITH-NOT: -complex-range=limited +// CHECK-NOT: -complex-range=smith +// WARN1: warning: overriding '-fcomplex-arithmetic=limited' option with '-fcomplex-arithmetic=smith' [-Woverriding-option] +// WARN2: warning: overriding '-fcomplex-arithmetic=limited' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN3: warning: overriding '-fcomplex-arithmetic=limited' option with '-fcomplex-arithmetic=extend' [-Woverriding-option] +// WARN4: warning: overriding '-fcomplex-arithmetic=smith' option with '-fcomplex-arithmetic=limited' [-Woverriding-option] +// WARN5: warning: overriding '-fcomplex-arithmetic=smith' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN6: warning: overriding '-fcomplex-arithmetic=smith' option with '-fcomplex-arithmetic=extend' [-Woverriding-option] +// WARN7: warning: overriding '-fcomplex-arithmetic=extend' option with '-fcomplex-arithmetic=limited' [-Woverriding-option] +// WARN8: warning: overriding '-fcomplex-arithmetic=extend' option with '-fcomplex-arithmetic=smith' [-Woverriding-option] +// WARN9: warning: overriding '-fcomplex-arithmetic=extend' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN10: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=limited' [-Woverriding-option] +// WARN11: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=smith' [-Woverriding-option] +// WARN12: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=extend' [-Woverriding-option] From eb9a35c4b3c42b0caf3ddddce4759dd4b1b640a8 Mon Sep 17 00:00:00 2001 From: Ammarguellat Date: Tue, 20 Feb 2024 13:24:52 -0800 Subject: [PATCH 02/18] Changed the names of the values for the option and added some code to deal with the type promotion. --- clang/docs/UsersManual.rst | 26 +- clang/include/clang/Basic/LangOptions.h | 41 ++- clang/include/clang/Driver/Options.td | 8 +- clang/lib/CodeGen/CGExprComplex.cpp | 65 ++-- clang/lib/Driver/ToolChains/Clang.cpp | 26 +- clang/lib/Parse/ParsePragma.cpp | 2 +- clang/test/CodeGen/cx-complex-range.c | 236 +++++++-------- clang/test/CodeGen/pragma-cx-limited-range.c | 296 +++++++++---------- clang/test/Driver/range.c | 96 +++--- 9 files changed, 419 insertions(+), 377 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 9ea5f89ece751..732eaf4f2b14e 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1851,21 +1851,29 @@ floating point semantic models: precise (the default), strict, and fast. This option specifies the implementation for complex multiplication and division. - Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. - * ``limited`` Implementation of complex division and multiplication using - algebraic formulas at source precision. Overflow and non-finites values - are not handled. - * ``smith`` Implementation of complex division using the Smith algorithm at + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at source precision. Smith's algorithm for complex division. See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). - Overflow is handled. + This value offers improved handling for overflow in intermediate calculations, + but overflow may occur. NaN and infinite and values are not handled in some + cases. * ``full`` Implementation of complex division and multiplication using a call to runtime library functions (generally the case, but the BE might sometimes replace the library call if it knows enough about the potential - range of the inputs). Overflow and non-finite values are handled. - * ``extend`` Implementation of complex division using algebraic formulas at - higher precision. Overflow is handled. + range of the inputs). Overflow and non-finite values are handled by the + library implementation. + * ``promoted`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. Non-finite values are handled in some + cases. If the target hardware does not have native support for a higher precision + data type, an implementation for the complex operation will be used to provide + improved guards against intermediate overflow, but overflow and underflow may + still occur in some cases. NaN and infinite and values are not handled. + This is the default value. .. _floating-point-environment: diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index b8b96d0688829..9eaa59b2d4e7f 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -400,24 +400,33 @@ class LangOptionsBase { // division. enum ComplexRangeKind { /// Implementation of complex division and multiplication using a call to - /// runtime library functions (generally the case, but the BE might - /// sometimes replace the library call if it knows enough about the - /// potential range of the inputs). Overflow and non-finite values are - /// handled. + /// runtime library functions(generally the case, but the BE might + /// sometimes replace the library call if it knows enough about the + /// potential range of the inputs). Overflow and non -finite values are + /// handled by the library implementation. CX_Full, - /// Implementation of complex division using the Smith algorithm at source - /// precision. Overflow is handled. - CX_Smith, - - /// Implementation of complex division using algebraic formulas at higher - /// precision. Overflow is handled. - CX_Extend, - - /// Implementation of complex division and multiplication using algebraic - /// formulas at source precision. Overflow and non-finites values are not - /// handled. - CX_Limited, + /// Implementation of complex division using the Smith algorithm at + /// source precision. Smith's algorithm for complex division. + /// See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + /// This value offers improved handling for overflow in intermediate calculations, + /// but overflow may occur. NaN and infinite and values are not handled in some + // cases. + CX_Improved, + + /// Implementation of complex division using algebraic formulas at + /// higher precision. Overflow is handled. Non-finite values are handled in + /// some cases. If the target hardware does not have native support for a + /// higher precision data type, an implementation for the complex operation + /// will be used to provide improved guards against intermediate overflow, + /// but overflow and underflow may still occur in some cases. NaN and + /// infinite and values are not handled. This is the default value. + CX_Promoted, + + /// Implementation of complex division and multiplication using + /// algebraic formulas at source precision.No special handling to avoid + /// overflow.NaN and infinite and values are not handled. + CX_Basic, /// No range rule is enabled. CX_None diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 848cc38188d86..5b791c43e678a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1014,13 +1014,13 @@ defm offload_uniform_block : BoolFOption<"offload-uniform-block", def fcomplex_arithmetic_EQ : Joined<["-"], "fcomplex-arithmetic=">, Group, Visibility<[ClangOption, CC1Option]>, - Values<"full,smith,extend,limited">, NormalizedValuesScope<"LangOptions">, - NormalizedValues<["CX_Full", "CX_Smith", "CX_Extend", "CX_Limited"]>; + Values<"full,improved,promoted,basic">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["CX_Full", "CX_Improved", "CX_Promoted", "CX_Basic"]>; def complex_range_EQ : Joined<["-"], "complex-range=">, Group, Visibility<[CC1Option]>, - Values<"full,smith,extend,limited">, NormalizedValuesScope<"LangOptions">, - NormalizedValues<["CX_Full", "CX_Smith", "CX_Extend", "CX_Limited"]>, + Values<"full,improved,promoted,basic">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["CX_Full", "CX_Improved", "CX_Promoted", "CX_Basic"]>, MarshallingInfoEnum, "CX_Full">; // OpenCL-only Options diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index e01be6e5fdd6e..2008265f1f6fd 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -283,23 +283,48 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); + QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType, + bool IsDivOpCode) { + const TargetInfo &TI = CGF.getContext().getTargetInfo(); + if (const auto *BT = dyn_cast(ElementType)) { + switch (BT->getKind()) { + case BuiltinType::Kind::Float16: + case BuiltinType::Kind::BFloat16: { + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + } + case BuiltinType::Kind::Float: + return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); + case BuiltinType::Kind::Double: + if (TI.hasLongDoubleType()) { + return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); + } else { + return QualType(); + } + case BuiltinType::Kind::LongDouble: + if (TI.getTriple().isOSLinux()) { + if (TI.hasFloat128Type() && !TI.hasLongDoubleType()) + return CGF.getContext().getComplexType(CGF.getContext().Float128Ty); + else + return CGF.getContext().getComplexType( + CGF.getContext().LongDoubleTy); + } + if (TI.getTriple().isOSWindows()) + return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); + default: + return QualType(); + } + } + return QualType(); + } + QualType getPromotionType(QualType Ty, bool IsDivOpCode = false) { if (auto *CT = Ty->getAs()) { QualType ElementType = CT->getElementType(); - if (CGF.getLangOpts().getComplexRange() == - LangOptions::ComplexRangeKind::CX_Extend && - IsDivOpCode) { - if (ElementType->isFloatingType()) { - if (const auto *BT = dyn_cast(ElementType)) - switch (BT->getKind()) { - case BuiltinType::Kind::Float: - return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); - default: - return CGF.getContext().getComplexType( - CGF.getContext().LongDoubleTy); - } - } - } + if (IsDivOpCode && ElementType->isFloatingType() && + CGF.getLangOpts().getComplexRange() == + LangOptions::ComplexRangeKind::CX_Promoted) + return HigherPrecisionTypeForComplexArithmetic(ElementType, + IsDivOpCode); if (ElementType.UseExcessPrecision(CGF.getContext())) return CGF.getContext().getComplexType(CGF.getContext().FloatTy); } @@ -804,9 +829,9 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { ResR = Builder.CreateFSub(AC, BD, "mul_r"); ResI = Builder.CreateFAdd(AD, BC, "mul_i"); - if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Smith || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Extend) + if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) return ComplexPairTy(ResR, ResI); // Emit the test for the real part becoming NaN and create a branch to @@ -997,10 +1022,10 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { llvm::Value *OrigLHSi = LHSi; if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); - if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Smith) + if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); - else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Extend) + else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi); else if (!CGF.getLangOpts().FastMath || // '-ffast-math' is used in the command line but followed by an diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7f4cb46a44f4c..54bf4d48d08df 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2692,14 +2692,14 @@ static std::string ComplexRangeKindToStr(LangOptions::ComplexRangeKind Range) { case LangOptions::ComplexRangeKind::CX_Full: return "full"; break; - case LangOptions::ComplexRangeKind::CX_Limited: - return "limited"; + case LangOptions::ComplexRangeKind::CX_Basic: + return "basic"; break; - case LangOptions::ComplexRangeKind::CX_Smith: - return "smith"; + case LangOptions::ComplexRangeKind::CX_Improved: + return "improved"; break; - case LangOptions::ComplexRangeKind::CX_Extend: - return "extend"; + case LangOptions::ComplexRangeKind::CX_Promoted: + return "promoted"; break; default: return ""; @@ -2791,12 +2791,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, StringRef Val = A->getValue(); if (Val.equals("full")) RangeVal = LangOptions::ComplexRangeKind::CX_Full; - else if (Val.equals("smith")) - RangeVal = LangOptions::ComplexRangeKind::CX_Smith; - else if (Val.equals("extend")) - RangeVal = LangOptions::ComplexRangeKind::CX_Extend; - else if (Val.equals("limited")) - RangeVal = LangOptions::ComplexRangeKind::CX_Limited; + else if (Val.equals("improved")) + RangeVal = LangOptions::ComplexRangeKind::CX_Improved; + else if (Val.equals("promoted")) + RangeVal = LangOptions::ComplexRangeKind::CX_Promoted; + else if (Val.equals("basic")) + RangeVal = LangOptions::ComplexRangeKind::CX_Basic; else D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << LangOptions::ComplexRangeKind::CX_None; @@ -3073,7 +3073,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, SeenUnsafeMathModeOption = true; // ffast-math enables fortran rules for complex multiplication and // division. - Range = LangOptions::ComplexRangeKind::CX_Limited; + Range = LangOptions::ComplexRangeKind::CX_Basic; break; } case options::OPT_fno_fast_math: diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index dc938e4e4ced1..87894ef47f8cb 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -909,7 +909,7 @@ void Parser::HandlePragmaCXLimitedRange() { LangOptions::ComplexRangeKind Range; switch (OOS) { case tok::OOS_ON: - Range = LangOptions::CX_Limited; + Range = LangOptions::CX_Basic; break; case tok::OOS_OFF: Range = LangOptions::CX_Full; diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index f78fda61a9d1c..8b467402b2277 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -2,98 +2,98 @@ // RUN: -o - | FileCheck %s --check-prefix=FULL // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=limited -o - | FileCheck %s --check-prefix=LMTD +// RUN: -complex-range=basic -o - | FileCheck %s --check-prefix=BASIC // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=smith -o - | FileCheck %s --check-prefix=SMITH +// RUN: -complex-range=improved -o - | FileCheck %s --check-prefix=IMPRVD // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=extend -o - | FileCheck %s --check-prefix=EXTND +// RUN: -complex-range=promoted -o - | FileCheck %s --check-prefix=PRMTD // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=full -o - | FileCheck %s --check-prefix=FULL // Fast math // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ -// RUN: -ffast-math -complex-range=limited -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=LMTD +// RUN: -ffast-math -complex-range=basic -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=BASIC // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=full -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=FULL // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ -// RUN: -ffast-math -complex-range=smith -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=SMITH +// RUN: -ffast-math -complex-range=improved -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=IMPRVD // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ -// RUN: -ffast-math -complex-range=extend -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=EXTND +// RUN: -ffast-math -complex-range=promoted -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=PRMTD _Complex float div(_Complex float a, _Complex float b) { // LABEL: define {{.*}} @div( // FULL: call {{.*}} @__divsc3 // - // LMTD: fmul{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fadd{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fadd{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fsub{{.*}}float - // LMTD-NEXT: fdiv{{.*}}float - // LMTD-NEXT: fdiv{{.*}}float + // BASIC: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fadd{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fadd{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fsub{{.*}}float + // BASIC-NEXT: fdiv{{.*}}float + // BASIC-NEXT: fdiv{{.*}}float // - // SMITH: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // SMITH-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // SMITH-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} - // SMITH-NEXT: br i1 {{.*}}, label - // SMITH: abs_rhsr_greater_or_equal_abs_rhsi: - // SMITH-NEXT: fdiv{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fadd{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fadd{{.*}}float - // SMITH-NEXT: fdiv{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fsub{{.*}}float - // SMITH-NEXT: fdiv{{.*}}float - // SMITH-NEXT: br label - // SMITH: abs_rhsr_less_than_abs_rhsi: - // SMITH-NEXT: fdiv{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fadd{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fadd{{.*}}float - // SMITH-NEXT: fdiv{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fsub{{.*}}float - // SMITH-NEXT: fdiv{{.*}}float + // IMPRVD: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // IMPRVD-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // IMPRVD-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} + // IMPRVD-NEXT: br i1 {{.*}}, label + // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fsub{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: br label + // IMPRVD: abs_rhsr_less_than_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fsub{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float // - // EXTND: load float, ptr {{.*}} - // EXTND: fpext float {{.*}} to double - // EXTND-NEXT: fpext float {{.*}} to double - // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // EXTND-NEXT: load float, ptr {{.*}} - // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // EXTND-NEXT: load float, ptr {{.*}} - // EXTND-NEXT: fpext float {{.*}} to double - // EXTND-NEXT: fpext float {{.*}} to double - // EXTND-NEXT: fmul{{.*}}double - // EXTND-NEXT: fmul{{.*}}double - // EXTND-NEXT: fadd{{.*}}double - // EXTND-NEXT: fmul{{.*}}double - // EXTND-NEXT: fmul{{.*}}double - // EXTND-NEXT: fadd{{.*}}double - // EXTND-NEXT: fmul{{.*}}double - // EXTND-NEXT: fmul{{.*}}double - // EXTND-NEXT: fsub{{.*}}double - // EXTND-NEXT: fdiv{{.*}}double - // EXTND-NEXT: fdiv{{.*}}double - // EXTND-NEXT: fptrunc double {{.*}} to float - // EXTND-NEXT: fptrunc double {{.*}} to float + // PRMTD: load float, ptr {{.*}} + // PRMTD: fpext float {{.*}} to double + // PRMTD-NEXT: fpext float {{.*}} to double + // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load float, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load float, ptr {{.*}} + // PRMTD-NEXT: fpext float {{.*}} to double + // PRMTD-NEXT: fpext float {{.*}} to double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fadd{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fadd{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fsub{{.*}}double + // PRMTD-NEXT: fdiv{{.*}}double + // PRMTD-NEXT: fdiv{{.*}}double + // PRMTD-NEXT: fptrunc double {{.*}} to float + // PRMTD-NEXT: fptrunc double {{.*}} to float return a / b; } @@ -102,59 +102,59 @@ _Complex float mul(_Complex float a, _Complex float b) { // LABEL: define {{.*}} @mul( // FULL: call {{.*}} @__mulsc3 // - // LMTD: alloca { float, float } - // LMTD-NEXT: alloca { float, float } - // LMTD-NEXT: alloca { float, float } - // LMTD: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // LMTD-NEXT: load float, ptr {{.*}} - // LMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // LMTD-NEXT: load float, ptr {{.*}} - // LMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // LMTD-NEXT: load float, ptr {{.*}} - // LMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // LMTD-NEXT: load float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fmul{{.*}}float - // LMTD-NEXT: fsub{{.*}}float - // LMTD-NEXT: fadd{{.*}}float + // BASIC: alloca { float, float } + // BASIC-NEXT: alloca { float, float } + // BASIC-NEXT: alloca { float, float } + // BASIC: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // BASIC-NEXT: load float, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load float, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // BASIC-NEXT: load float, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fsub{{.*}}float + // BASIC-NEXT: fadd{{.*}}float // - // SMITH: alloca { float, float } - // SMITH-NEXT: alloca { float, float } - // SMITH-NEXT: alloca { float, float } - // SMITH: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // SMITH-NEXT: load float, ptr {{.*}} - // SMITH-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // SMITH-NEXT: load float, ptr {{.*}} - // SMITH-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // SMITH-NEXT: load float, ptr {{.*}} - // SMITH-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // SMITH-NEXT: load float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fmul{{.*}}float - // SMITH-NEXT: fsub{{.*}}float - // SMITH-NEXT: fadd{{.*}}float + // IMPRVD: alloca { float, float } + // IMPRVD-NEXT: alloca { float, float } + // IMPRVD-NEXT: alloca { float, float } + // IMPRVD: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // IMPRVD-NEXT: load float, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load float, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // IMPRVD-NEXT: load float, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fsub{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float // - // EXTND: alloca { float, float } - // EXTND-NEXT: alloca { float, float } - // EXTND-NEXT: alloca { float, float } - // EXTND: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // EXTND-NEXT: load float, ptr - // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // EXTND-NEXT: load float, ptr {{.*}} - // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // EXTND-NEXT: load float, ptr {{.*}} - // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // EXTND-NEXT: load{{.*}}float - // EXTND-NEXT: fmul{{.*}}float - // EXTND-NEXT: fmul{{.*}}float - // EXTND-NEXT: fmul{{.*}}float - // EXTND-NEXT: fmul{{.*}}float - // EXTND-NEXT: fsub{{.*}}float - // EXTND-NEXT: fadd{{.*}}float + // PRMTD: alloca { float, float } + // PRMTD-NEXT: alloca { float, float } + // PRMTD-NEXT: alloca { float, float } + // PRMTD: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load float, ptr + // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load float, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load float, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fsub{{.*}}float + // PRMTD-NEXT: fadd{{.*}}float return a * b; } diff --git a/clang/test/CodeGen/pragma-cx-limited-range.c b/clang/test/CodeGen/pragma-cx-limited-range.c index bb0576dd51b98..d47c71905887d 100644 --- a/clang/test/CodeGen/pragma-cx-limited-range.c +++ b/clang/test/CodeGen/pragma-cx-limited-range.c @@ -2,13 +2,13 @@ // RUN: -o - | FileCheck %s --check-prefix=FULL // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=limited -o - | FileCheck --check-prefix=LMTD %s +// RUN: -complex-range=basic -o - | FileCheck --check-prefix=BASIC %s // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=smith -o - | FileCheck --check-prefix=SMITH %s +// RUN: -complex-range=improved -o - | FileCheck --check-prefix=IMPRVD %s // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=extend -o - | FileCheck --check-prefix=EXTND %s +// RUN: -complex-range=promoted -o - | FileCheck --check-prefix=PRMTD %s // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=full -o - | FileCheck --check-prefix=FULL %s @@ -24,26 +24,26 @@ _Complex float pragma_on_mul(_Complex float a, _Complex float b) { // FULL-NEXT: fsub float // FULL-NEXT: fadd float - // LMTD: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fsub float - // LMTD-NEXT: fadd float - - // SMITH: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fsub float - // SMITH-NEXT: fadd float - - // EXTND: fmul float - // EXTND-NEXT: fmul float - // EXTND-NEXT: fmul float - // EXTND-NEXT: fmul float - // EXTND-NEXT: fsub float - // EXTND-NEXT: fadd float + // BASIC: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fsub float + // BASIC-NEXT: fadd float + + // IMPRVD: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fsub float + // IMPRVD-NEXT: fadd float + + // PRMTD: fmul float + // PRMTD-NEXT: fmul float + // PRMTD-NEXT: fmul float + // PRMTD-NEXT: fmul float + // PRMTD-NEXT: fsub float + // PRMTD-NEXT: fadd float return a * b; } @@ -54,11 +54,11 @@ _Complex float pragma_off_mul(_Complex float a, _Complex float b) { // FULL: call {{.*}} @__mulsc3 - // LMTD: call {{.*}} @__mulsc3 + // BASIC: call {{.*}} @__mulsc3 - // SMITH: call {{.*}} @__mulsc3 + // IMPRVD: call {{.*}} @__mulsc3 - // EXTND: call {{.*}} @__mulsc3 + // PRMTD: call {{.*}} @__mulsc3 return a * b; } @@ -79,45 +79,45 @@ _Complex float pragma_on_div(_Complex float a, _Complex float b) { // FULL-NEXT: fdiv float // FULL: fdiv float - // LMTD: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fsub float - // LMTD-NEXT: fdiv float - // LMTD-NEXT: fdiv float - - // SMITH: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fadd float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fadd float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fsub float - // SMITH-NEXT: fdiv float - // SMITH-NEXT: fdiv float - - // EXTND: fpext float {{.*}} to double - // EXTND: fpext float {{.*}} to double - // EXTND: fmul double - // EXTND: fmul double - // EXTND: fadd double - // EXTND: fmul double - // EXTND: fmul double - // EXTND: fadd double - // EXTND: fmul double - // EXTND: fmul double - // EXTND: fsub double - // EXTND: fdiv double - // EXTND: fdiv double - // EXTND: fptrunc double - // EXTND: fptrunc double + // BASIC: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fadd float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fadd float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fsub float + // BASIC-NEXT: fdiv float + // BASIC-NEXT: fdiv float + + // IMPRVD: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fsub float + // IMPRVD-NEXT: fdiv float + // IMPRVD-NEXT: fdiv float + + // PRMTD: fpext float {{.*}} to double + // PRMTD: fpext float {{.*}} to double + // PRMTD: fmul double + // PRMTD: fmul double + // PRMTD: fadd double + // PRMTD: fmul double + // PRMTD: fmul double + // PRMTD: fadd double + // PRMTD: fmul double + // PRMTD: fmul double + // PRMTD: fsub double + // PRMTD: fdiv double + // PRMTD: fdiv double + // PRMTD: fptrunc double + // PRMTD: fptrunc double return a / b; } @@ -128,11 +128,11 @@ _Complex float pragma_off_div(_Complex float a, _Complex float b) { // FULL: call {{.*}} @__divsc3 - // LMTD: call {{.*}} @__divsc3 + // BASIC: call {{.*}} @__divsc3 - // SMITH: call {{.*}} @__divsc3 + // IMPRVD: call {{.*}} @__divsc3 - // EXTND: call {{.*}} @__divdc3 + // PRMTD: call {{.*}} @__divdc3 return a / b; } @@ -148,26 +148,26 @@ _Complex float pragma_default_mul(_Complex float a, _Complex float b) { // FULL-NEXT: fsub float // FULL-NEXT: fadd float - // LMTD: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fsub float - // LMTD-NEXT: fadd float - - // SMITH: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fsub float - // SMITH-NEXT: fadd float - - // EXTND: fmul float - // EXTND-NEXT: fmul float - // EXTND-NEXT: fmul float - // EXTND-NEXT: fmul float - // EXTND-NEXT: fsub float - // EXTND-NEXT: fadd float + // BASIC: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fsub float + // BASIC-NEXT: fadd float + + // IMPRVD: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fsub float + // IMPRVD-NEXT: fadd float + + // PRMTD: fmul float + // PRMTD-NEXT: fmul float + // PRMTD-NEXT: fmul float + // PRMTD-NEXT: fmul float + // PRMTD-NEXT: fsub float + // PRMTD-NEXT: fadd float return a * b; } @@ -177,66 +177,66 @@ _Complex float pragma_default_div(_Complex float a, _Complex float b) { // FULL: call {{.*}} @__divsc3 - // LMTD: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fsub float - // LMTD-NEXT: fdiv float - // LMTD-NEXT: fdiv float - - // SMITH: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // SMITH-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // SMITH-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} - // SMITH-NEXT: br i1 {{.*}}, label - // SMITH: abs_rhsr_greater_or_equal_abs_rhsi: - // SMITH-NEXT: fdiv float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fadd float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fadd float - // SMITH-NEXT: fdiv float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fsub float - // SMITH-NEXT: fdiv float - // SMITH-NEXT: br label - // SMITH: abs_rhsr_less_than_abs_rhsi: - // SMITH-NEXT: fdiv float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fadd float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fadd float - // SMITH-NEXT: fdiv float - // SMITH-NEXT: fmul float - // SMITH-NEXT: fsub float - // SMITH-NEXT: fdiv float - - // EXTND: load float, ptr {{.*}} - // EXTND: fpext float {{.*}} to double - // EXTND-NEXT: fpext float {{.*}} to double - // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // EXTND-NEXT: load float, ptr {{.*}} - // EXTND-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // EXTND-NEXT: load float, ptr {{.*}} - // EXTND-NEXT: fpext float {{.*}} to double - // EXTND-NEXT: fpext float {{.*}} to double - // EXTND-NEXT: fmul double - // EXTND-NEXT: fmul double - // EXTND-NEXT: fadd double - // EXTND-NEXT: fmul double - // EXTND-NEXT: fmul double - // EXTND-NEXT: fadd double - // EXTND-NEXT: fmul double - // EXTND-NEXT: fmul double - // EXTND-NEXT: fsub double - // EXTND-NEXT: fdiv double - // EXTND-NEXT: fdiv double - // EXTND-NEXT: fptrunc double {{.*}} to float - // EXTND-NEXT: fptrunc double {{.*}} to float + // BASIC: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fadd float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fadd float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fsub float + // BASIC-NEXT: fdiv float + // BASIC-NEXT: fdiv float + + // IMPRVD: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // IMPRVD-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // IMPRVD-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} + // IMPRVD-NEXT: br i1 {{.*}}, label + // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: + // IMPRVD-NEXT: fdiv float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fdiv float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fsub float + // IMPRVD-NEXT: fdiv float + // IMPRVD-NEXT: br label + // IMPRVD: abs_rhsr_less_than_abs_rhsi: + // IMPRVD-NEXT: fdiv float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fdiv float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fsub float + // IMPRVD-NEXT: fdiv float + + // PRMTD: load float, ptr {{.*}} + // PRMTD: fpext float {{.*}} to double + // PRMTD-NEXT: fpext float {{.*}} to double + // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load float, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load float, ptr {{.*}} + // PRMTD-NEXT: fpext float {{.*}} to double + // PRMTD-NEXT: fpext float {{.*}} to double + // PRMTD-NEXT: fmul double + // PRMTD-NEXT: fmul double + // PRMTD-NEXT: fadd double + // PRMTD-NEXT: fmul double + // PRMTD-NEXT: fmul double + // PRMTD-NEXT: fadd double + // PRMTD-NEXT: fmul double + // PRMTD-NEXT: fmul double + // PRMTD-NEXT: fsub double + // PRMTD-NEXT: fdiv double + // PRMTD-NEXT: fdiv double + // PRMTD-NEXT: fptrunc double {{.*}} to float + // PRMTD-NEXT: fptrunc double {{.*}} to float return a / b; } diff --git a/clang/test/Driver/range.c b/clang/test/Driver/range.c index 434e9407664ce..b064fa9378ab9 100644 --- a/clang/test/Driver/range.c +++ b/clang/test/Driver/range.c @@ -1,93 +1,93 @@ // Test range options for complex multiplication and division. -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=LMTD %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=SMITH %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=IMPRVD %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=EXTND %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=PRMTD %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=FULL %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited \ -// RUN: -fcomplex-arithmetic=smith -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ +// RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN1 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN2 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited \ -// RUN: -fcomplex-arithmetic=extend -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ +// RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN3 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith \ -// RUN: -fcomplex-arithmetic=limited -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ +// RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN4 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN5 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=smith \ -// RUN: -fcomplex-arithmetic=extend -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ +// RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN6 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend \ -// RUN: -fcomplex-arithmetic=limited -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ +// RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN7 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend \ -// RUN: -fcomplex-arithmetic=smith -c %s 2>&1 \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ +// RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN8 %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=extend \ +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN9 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ -// RUN: -fcomplex-arithmetic=limited -c %s 2>&1 \ +// RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN10 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ -// RUN: -fcomplex-arithmetic=smith -c %s 2>&1 \ +// RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN11 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ -// RUN: -fcomplex-arithmetic=extend -c %s 2>&1 \ +// RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN12 %s // RUN: %clang -### -target x86_64 -ffast-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=LMTD %s +// RUN: | FileCheck --check-prefix=BASIC %s -// RUN: %clang -### -target x86_64 -ffast-math -fcomplex-arithmetic=limited -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=LMTD %s +// RUN: %clang -### -target x86_64 -ffast-math -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s -// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=limited -ffast-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=LMTD %s +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s -// LMTD: -complex-range=limited +// BASIC: -complex-range=basic // FULL: -complex-range=full -// EXTND: -complex-range=extend -// LMTD-NOT: -complex-range=smith -// CHECK-NOT: -complex-range=limited -// SMITH: -complex-range=smith -// SMITH-NOT: -complex-range=limited -// CHECK-NOT: -complex-range=smith -// WARN1: warning: overriding '-fcomplex-arithmetic=limited' option with '-fcomplex-arithmetic=smith' [-Woverriding-option] -// WARN2: warning: overriding '-fcomplex-arithmetic=limited' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN3: warning: overriding '-fcomplex-arithmetic=limited' option with '-fcomplex-arithmetic=extend' [-Woverriding-option] -// WARN4: warning: overriding '-fcomplex-arithmetic=smith' option with '-fcomplex-arithmetic=limited' [-Woverriding-option] -// WARN5: warning: overriding '-fcomplex-arithmetic=smith' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN6: warning: overriding '-fcomplex-arithmetic=smith' option with '-fcomplex-arithmetic=extend' [-Woverriding-option] -// WARN7: warning: overriding '-fcomplex-arithmetic=extend' option with '-fcomplex-arithmetic=limited' [-Woverriding-option] -// WARN8: warning: overriding '-fcomplex-arithmetic=extend' option with '-fcomplex-arithmetic=smith' [-Woverriding-option] -// WARN9: warning: overriding '-fcomplex-arithmetic=extend' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN10: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=limited' [-Woverriding-option] -// WARN11: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=smith' [-Woverriding-option] -// WARN12: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=extend' [-Woverriding-option] +// PRMTD: -complex-range=promoted +// BASIC-NOT: -complex-range=improved +// CHECK-NOT: -complex-range=basic +// IMPRVD: -complex-range=improved +// IMPRVD-NOT: -complex-range=basic +// CHECK-NOT: -complex-range=improved +// WARN1: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] +// WARN2: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN3: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] +// WARN4: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] +// WARN5: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN6: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] +// WARN7: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] +// WARN8: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] +// WARN9: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN10: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] +// WARN11: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] +// WARN12: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] From 4aa0925259fcc46f513c9b2922bf405b72089193 Mon Sep 17 00:00:00 2001 From: Ammarguellat Date: Wed, 21 Feb 2024 06:28:33 -0800 Subject: [PATCH 03/18] Fix LIT tests. --- clang/test/CodeGen/complex-math.c | 2 +- clang/test/CodeGen/smiths-complex-div.c | 104 ++++++++++++------------ 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/clang/test/CodeGen/complex-math.c b/clang/test/CodeGen/complex-math.c index a44aa0014a658..ba00b9cbecd2f 100644 --- a/clang/test/CodeGen/complex-math.c +++ b/clang/test/CodeGen/complex-math.c @@ -5,7 +5,7 @@ // RUN: %clang_cc1 %s -O0 -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM // RUN: %clang_cc1 %s -O0 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF // RUN: %clang_cc1 %s -O0 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple aarch64-unknown-unknown -ffast-math -ffp-contract=fast -complex-range=fortran -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple aarch64-unknown-unknown -ffast-math -ffp-contract=fast -complex-range=improved -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH // RUN: %clang_cc1 %s -O0 -emit-llvm -triple spir -o - | FileCheck %s --check-prefix=SPIR float _Complex add_float_rr(float a, float b) { diff --git a/clang/test/CodeGen/smiths-complex-div.c b/clang/test/CodeGen/smiths-complex-div.c index 75775675c9238..5882f8b3545f9 100644 --- a/clang/test/CodeGen/smiths-complex-div.c +++ b/clang/test/CodeGen/smiths-complex-div.c @@ -1,58 +1,58 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=fortran -o - | FileCheck %s --check-prefix=FRTRN +// RUN: -complex-range=improved -o - | FileCheck %s --check-prefix=IMPRVD -// FRTRN-LABEL: define dso_local <2 x float> @div( -// FRTRN-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// FRTRN-NEXT: entry: -// FRTRN-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 -// FRTRN-NEXT: [[A:%.*]] = alloca { float, float }, align 4 -// FRTRN-NEXT: [[B:%.*]] = alloca { float, float }, align 4 -// FRTRN-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 -// FRTRN-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 -// FRTRN-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 -// FRTRN-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 -// FRTRN-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 -// FRTRN-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 -// FRTRN-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 -// FRTRN-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 -// FRTRN-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 -// FRTRN-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 -// FRTRN-NEXT: [[TMP0:%.*]] = call float @llvm.fabs.f32(float [[B_REAL]]) -// FRTRN-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[B_IMAG]]) -// FRTRN-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP0]], [[TMP1]] -// FRTRN-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] -// FRTRN: abs_rhsr_greater_or_equal_abs_rhsi: -// FRTRN-NEXT: [[TMP2:%.*]] = fdiv float [[B_IMAG]], [[B_REAL]] -// FRTRN-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], [[B_IMAG]] -// FRTRN-NEXT: [[TMP4:%.*]] = fadd float [[B_REAL]], [[TMP3]] -// FRTRN-NEXT: [[TMP5:%.*]] = fmul float [[A_IMAG]], [[TMP2]] -// FRTRN-NEXT: [[TMP6:%.*]] = fadd float [[A_REAL]], [[TMP5]] -// FRTRN-NEXT: [[TMP7:%.*]] = fdiv float [[TMP6]], [[TMP4]] -// FRTRN-NEXT: [[TMP8:%.*]] = fmul float [[A_REAL]], [[TMP2]] -// FRTRN-NEXT: [[TMP9:%.*]] = fsub float [[A_IMAG]], [[TMP8]] -// FRTRN-NEXT: [[TMP10:%.*]] = fdiv float [[TMP9]], [[TMP4]] -// FRTRN-NEXT: br label [[COMPLEX_DIV:%.*]] -// FRTRN: abs_rhsr_less_than_abs_rhsi: -// FRTRN-NEXT: [[TMP11:%.*]] = fdiv float [[B_REAL]], [[B_IMAG]] -// FRTRN-NEXT: [[TMP12:%.*]] = fmul float [[TMP11]], [[B_REAL]] -// FRTRN-NEXT: [[TMP13:%.*]] = fadd float [[B_IMAG]], [[TMP12]] -// FRTRN-NEXT: [[TMP14:%.*]] = fmul float [[A_REAL]], [[TMP11]] -// FRTRN-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[A_IMAG]] -// FRTRN-NEXT: [[TMP16:%.*]] = fdiv float [[TMP15]], [[TMP13]] -// FRTRN-NEXT: [[TMP17:%.*]] = fmul float [[A_IMAG]], [[TMP11]] -// FRTRN-NEXT: [[TMP18:%.*]] = fsub float [[TMP17]], [[A_REAL]] -// FRTRN-NEXT: [[TMP19:%.*]] = fdiv float [[TMP18]], [[TMP13]] -// FRTRN-NEXT: br label [[COMPLEX_DIV]] -// FRTRN: complex_div: -// FRTRN-NEXT: [[TMP20:%.*]] = phi float [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] -// FRTRN-NEXT: [[TMP21:%.*]] = phi float [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] -// FRTRN-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 -// FRTRN-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 -// FRTRN-NEXT: store float [[TMP20]], ptr [[RETVAL_REALP]], align 4 -// FRTRN-NEXT: store float [[TMP21]], ptr [[RETVAL_IMAGP]], align 4 -// FRTRN-NEXT: [[TMP22:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 -// FRTRN-NEXT: ret <2 x float> [[TMP22]] +// IMPRVD-LABEL: define dso_local <2 x float> @div( +// IMPRVD-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// IMPRVD-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// IMPRVD-NEXT: [[TMP0:%.*]] = call float @llvm.fabs.f32(float [[B_REAL]]) +// IMPRVD-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[B_IMAG]]) +// IMPRVD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP0]], [[TMP1]] +// IMPRVD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD-NEXT: [[TMP2:%.*]] = fdiv float [[B_IMAG]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP4:%.*]] = fadd float [[B_REAL]], [[TMP3]] +// IMPRVD-NEXT: [[TMP5:%.*]] = fmul float [[A_IMAG]], [[TMP2]] +// IMPRVD-NEXT: [[TMP6:%.*]] = fadd float [[A_REAL]], [[TMP5]] +// IMPRVD-NEXT: [[TMP7:%.*]] = fdiv float [[TMP6]], [[TMP4]] +// IMPRVD-NEXT: [[TMP8:%.*]] = fmul float [[A_REAL]], [[TMP2]] +// IMPRVD-NEXT: [[TMP9:%.*]] = fsub float [[A_IMAG]], [[TMP8]] +// IMPRVD-NEXT: [[TMP10:%.*]] = fdiv float [[TMP9]], [[TMP4]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi: +// IMPRVD-NEXT: [[TMP11:%.*]] = fdiv float [[B_REAL]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP12:%.*]] = fmul float [[TMP11]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP13:%.*]] = fadd float [[B_IMAG]], [[TMP12]] +// IMPRVD-NEXT: [[TMP14:%.*]] = fmul float [[A_REAL]], [[TMP11]] +// IMPRVD-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[A_IMAG]] +// IMPRVD-NEXT: [[TMP16:%.*]] = fdiv float [[TMP15]], [[TMP13]] +// IMPRVD-NEXT: [[TMP17:%.*]] = fmul float [[A_IMAG]], [[TMP11]] +// IMPRVD-NEXT: [[TMP18:%.*]] = fsub float [[TMP17]], [[A_REAL]] +// IMPRVD-NEXT: [[TMP19:%.*]] = fdiv float [[TMP18]], [[TMP13]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD: complex_div: +// IMPRVD-NEXT: [[TMP20:%.*]] = phi float [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[TMP21:%.*]] = phi float [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store float [[TMP20]], ptr [[RETVAL_REALP]], align 4 +// IMPRVD-NEXT: store float [[TMP21]], ptr [[RETVAL_IMAGP]], align 4 +// IMPRVD-NEXT: [[TMP22:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// IMPRVD-NEXT: ret <2 x float> [[TMP22]] // _Complex float div(_Complex float a, _Complex float b) { return a / b; From 2ddba9a14831acb777d1ba461c5291d9744bfb92 Mon Sep 17 00:00:00 2001 From: Ammarguellat Date: Thu, 22 Feb 2024 06:42:31 -0800 Subject: [PATCH 04/18] Addressed only a few issue from reviewer's comment. Updated the RN and added a warning to tell the user about the presence of nnan and ninf with full implementation. Changed the behavior of multplication: when full implementation is required in the presence of nnan and ninf it will implement basic implementation instead. --- clang/docs/UsersManual.rst | 17 ++++++++------- .../clang/Basic/DiagnosticDriverKinds.td | 2 ++ clang/include/clang/Basic/LangOptions.h | 11 ++++------ clang/lib/CodeGen/CGExprComplex.cpp | 11 ++++++++-- clang/lib/Driver/ToolChains/Clang.cpp | 7 ++++++- clang/test/CodeGen/cx-complex-range.c | 21 ++++++++++++++++++- 6 files changed, 50 insertions(+), 19 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 732eaf4f2b14e..ca011ce05b152 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1856,20 +1856,21 @@ floating point semantic models: precise (the default), strict, and fast. * ``basic`` Implementation of complex division and multiplication using algebraic formulas at source precision. No special handling to avoid overflow. NaN and infinite and values are not handled. - * ``improved`` Implementation of complex division using the Smith algorithm at - source precision. Smith's algorithm for complex division. + * ``improved`` Implementation of complex division using the Smith algorithm + at source precision. Smith's algorithm for complex division. See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). - This value offers improved handling for overflow in intermediate calculations, - but overflow may occur. NaN and infinite and values are not handled in some - cases. - * ``full`` Implementation of complex division and multiplication using a + This value offers improved handling for overflow in intermediate + calculations, but overflow may occur. NaN and infinite and values are not + handled in some cases. + * ``full`` Implementation of complex division and multiplication using a call to runtime library functions (generally the case, but the BE might sometimes replace the library call if it knows enough about the potential range of the inputs). Overflow and non-finite values are handled by the - library implementation. + library implementation. For the case of multiplication overflow will occur in + accordance with normal floating-point rules. * ``promoted`` Implementation of complex division using algebraic formulas at higher precision. Overflow is handled. Non-finite values are handled in some - cases. If the target hardware does not have native support for a higher precision + cases. If the target does not have native support for a higher precision data type, an implementation for the complex operation will be used to provide improved guards against intermediate overflow, but overflow and underflow may still occur in some cases. NaN and infinite and values are not handled. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index b13181f6e7089..b904a5f54238c 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -445,6 +445,8 @@ def warn_drv_no_floating_point_registers: Warning< InGroup; def warn_ignoring_ftabstop_value : Warning< "ignoring invalid -ftabstop value '%0', using default value %1">; +def warn_nnan_ninf_with_full_range_complex_arithmetic : Warning< + "use of nnan or ninf flags in the presence of '%0' option">; def warn_drv_overriding_option : Warning< "overriding '%0' option with '%1'">, InGroup>; diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 40e377f111b82..2de21c5f60316 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -406,12 +406,9 @@ class LangOptionsBase { /// handled by the library implementation. CX_Full, - /// Implementation of complex division using the Smith algorithm at - /// source precision. Smith's algorithm for complex division. - /// See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 - /// (1962). This value offers improved handling for overflow in intermediate - /// calculations, but overflow may occur. NaN and infinite and values are - /// not handled in some cases. + /// Implementation of complex division offering an improved handling + /// for overflow in intermediate calculations with no special handling for + /// NaN and infinite and values. CX_Improved, /// Implementation of complex division using algebraic formulas at @@ -424,7 +421,7 @@ class LangOptionsBase { CX_Promoted, /// Implementation of complex division and multiplication using - /// algebraic formulas at source precision.No special handling to avoid + /// algebraic formulas at source precision. No special handling to avoid /// overflow.NaN and infinite and values are not handled. CX_Basic, diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index bb577d9287121..9111488f50bcf 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -832,7 +832,8 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) + Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted || + CGF.getLangOpts().NoHonorInfs || CGF.getLangOpts().NoHonorNaNs) return ComplexPairTy(ResR, ResI); // Emit the test for the real part becoming NaN and create a branch to @@ -1023,7 +1024,13 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { llvm::Value *OrigLHSi = LHSi; if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); - if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved) + const TargetInfo &TI = CGF.getContext().getTargetInfo(); + QualType ComplexElementTy = + Op.Ty->castAs()->getElementType(); + const BuiltinType *BT = ComplexElementTy->getAs(); + if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || + (TI.getTriple().isOSLinux() && + BT->getKind() == BuiltinType::Kind::LongDouble)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 444adb4da35d9..c03000347d8d6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2789,8 +2789,13 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, FPExceptionBehavior = ""; // If fast-math is set then set the fp-contract mode to fast. FPContract = "fast"; - // ffast-math enables limited range rules for complex multiplication and + // ffast-math enables basic range rules for complex multiplication and // division. + // Warn if user expects to perform full implementation of complex + // multiplication or division in the presence of nnan or ninf flags. + if (Range == LangOptions::ComplexRangeKind::CX_Full) + D.Diag(clang::diag::warn_nnan_ninf_with_full_range_complex_arithmetic) + << ComplexArithmeticStr(Range); Range = LangOptions::ComplexRangeKind::CX_Basic; SeenUnsafeMathModeOption = true; }; diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index 8b467402b2277..57cb84cb793ad 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -20,7 +20,7 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=full -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=FULL +// RUN: | FileCheck %s --check-prefix=FULL_FAST // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=improved -emit-llvm -o - %s \ @@ -33,6 +33,7 @@ _Complex float div(_Complex float a, _Complex float b) { // LABEL: define {{.*}} @div( // FULL: call {{.*}} @__divsc3 + // FULL_FAST: call {{.*}} @__divsc3 // // BASIC: fmul{{.*}}float // BASIC-NEXT: fmul{{.*}}float @@ -102,6 +103,24 @@ _Complex float mul(_Complex float a, _Complex float b) { // LABEL: define {{.*}} @mul( // FULL: call {{.*}} @__mulsc3 // + // FULL_FAST: alloca { float, float } + // FULL_FAST-NEXT: alloca { float, float } + // FULL_FAST-NEXT: alloca { float, float } + // FULL_FAST: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // FULL_FAST-NEXT: load float, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load float, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 + // FULL_FAST-NEXT: load float, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fsub{{.*}}float + // FULL_FAST-NEXT: fadd{{.*}}float + // BASIC: alloca { float, float } // BASIC-NEXT: alloca { float, float } // BASIC-NEXT: alloca { float, float } From e62c4620f99daceedc0a0cb53c9cedad5f26dbbf Mon Sep 17 00:00:00 2001 From: Ammarguellat Date: Mon, 26 Feb 2024 07:27:08 -0800 Subject: [PATCH 05/18] Fixed warnings. --- clang/docs/UsersManual.rst | 17 +++ .../clang/Basic/DiagnosticDriverKinds.td | 2 - clang/include/clang/Driver/Options.td | 18 +++ clang/lib/CodeGen/CGExprComplex.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 129 ++++++++++++++++-- clang/lib/Parse/ParsePragma.cpp | 5 + clang/test/CodeGen/cx-complex-range.c | 9 ++ clang/test/CodeGen/pragma-cx-limited-range.c | 3 + clang/test/Driver/range.c | 127 +++++++++++++---- 9 files changed, 276 insertions(+), 36 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index ca011ce05b152..91a601ddd73d4 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1876,6 +1876,23 @@ floating point semantic models: precise (the default), strict, and fast. still occur in some cases. NaN and infinite and values are not handled. This is the default value. +.. option:: -fcx-limited-range: + + This option is aliased to ``-fcomplex-arithmetic=basic``. It enables the + naive mathematical formulas for complex division and multiplication with no + NaN checking of results. The default is ``-fno-cx-limited-range`` aliased to + ``-fcomplex-arithmetic=full``. This option is enabled by the ``-ffast-math`` + option. + +.. option:: -fcx-fortran-rules: + + This option is aliased to ``-fcomplex-arithmetic=improved``. It enables the + naive mathematical formulas for complex multiplication and enables application + of Smith's algorithm for complex division. See SMITH, R. L. Algorithm 116: + Complex division. Commun. ACM 5, 8 (1962). + The default is ``-fno-cx-fortran-rules`` aliased to + ``-fcomplex-arithmetic=full``. + .. _floating-point-environment: Accessing the floating point environment diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index b904a5f54238c..b13181f6e7089 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -445,8 +445,6 @@ def warn_drv_no_floating_point_registers: Warning< InGroup; def warn_ignoring_ftabstop_value : Warning< "ignoring invalid -ftabstop value '%0', using default value %1">; -def warn_nnan_ninf_with_full_range_complex_arithmetic : Warning< - "use of nnan or ninf flags in the presence of '%0' option">; def warn_drv_overriding_option : Warning< "overriding '%0' option with '%1'">, InGroup>; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3757317a84f8f..06da037d957c6 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1050,6 +1050,24 @@ def complex_range_EQ : Joined<["-"], "complex-range=">, Group, NormalizedValues<["CX_Full", "CX_Improved", "CX_Promoted", "CX_Basic"]>, MarshallingInfoEnum, "CX_Full">; +def fcx_limited_range : Flag<["-"], "fcx-limited-range">, + Group, Visibility<[ClangOption, CC1Option]>, + HelpText<"Basic algebraic expansions of complex arithmetic operations " + "involving are enabled.">; + +def fno_cx_limited_range : Flag<["-"], "fno-cx-limited-range">, + Group, Visibility<[ClangOption, CC1Option]>, + HelpText<"Basic algebraic expansions of complex arithmetic operations " + "involving are disabled.">; + +def fcx_fortran_rules : Flag<["-"], "fcx-fortran-rules">, + Group, Visibility<[ClangOption, CC1Option]>, + HelpText<"Range reduction is enabled for complex arithmetic operations.">; + +def fno_cx_fortran_rules : Flag<["-"], "fno-cx-fortran-rules">, + Group, Visibility<[ClangOption, CC1Option]>, + HelpText<"Range reduction is disabled for complex arithmetic operations.">; + // OpenCL-only Options def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 9111488f50bcf..5532b876c1779 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -1037,7 +1037,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi); else if (!CGF.getLangOpts().FastMath || // '-ffast-math' is used in the command line but followed by an - // '-fno-cx-limited-range'. + // '-fno-cx-limited-range' or '-fcomplex-arithmetic=full'. Op.FPFeatures.getComplexRange() == LangOptions::CX_Full) { LHSi = OrigLHSi; // If we have a complex operand on the RHS and FastMath is not allowed, we diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c03000347d8d6..fa26937a253c8 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2707,15 +2707,34 @@ static std::string ComplexRangeKindToStr(LangOptions::ComplexRangeKind Range) { } static std::string ComplexArithmeticStr(LangOptions::ComplexRangeKind Range) { - return "-fcomplex-arithmetic=" + ComplexRangeKindToStr(Range); + return (Range == LangOptions::ComplexRangeKind::CX_None) + ? "" + : "-fcomplex-arithmetic=" + ComplexRangeKindToStr(Range); } static void EmitComplexRangeDiag(const Driver &D, LangOptions::ComplexRangeKind Range1, - LangOptions::ComplexRangeKind Range2) { - if (Range1 != Range2 && Range1 != LangOptions::ComplexRangeKind::CX_None) - D.Diag(clang::diag::warn_drv_overriding_option) - << ComplexArithmeticStr(Range1) << ComplexArithmeticStr(Range2); + LangOptions::ComplexRangeKind Range2, + std::string GccRangeComplexOption) { + if (Range1 != Range2 && Range1 != LangOptions::ComplexRangeKind::CX_None) { + if (!GccRangeComplexOption.empty()) + if (Range1 == LangOptions::ComplexRangeKind::CX_Basic) + D.Diag(clang::diag::warn_drv_overriding_option) + << GccRangeComplexOption << ComplexArithmeticStr(Range2); + else if (Range2 == LangOptions::ComplexRangeKind::CX_Basic) + D.Diag(clang::diag::warn_drv_overriding_option) + << ComplexArithmeticStr(Range1) << GccRangeComplexOption; + else + D.Diag(clang::diag::warn_drv_overriding_option) + << ComplexArithmeticStr(Range1) << ComplexArithmeticStr(Range2); + } +} + +static void EmitComplexRangeDiagZA(const Driver &D, std::string str1, + std::string str2) { + if ((str1.compare(str2) != 0) && !str2.empty() && !str1.empty()) { + D.Diag(clang::diag::warn_drv_overriding_option) << str1 << str2; + } } static std::string @@ -2774,6 +2793,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, StringRef BFloat16ExcessPrecision = ""; LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None; std::string ComplexRangeStr = ""; + std::string GccRangeComplexOption = ""; // Lambda to set fast-math options. This is also used by -ffp-model=fast auto applyFastMath = [&]() { @@ -2794,8 +2814,14 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // Warn if user expects to perform full implementation of complex // multiplication or division in the presence of nnan or ninf flags. if (Range == LangOptions::ComplexRangeKind::CX_Full) - D.Diag(clang::diag::warn_nnan_ninf_with_full_range_complex_arithmetic) - << ComplexArithmeticStr(Range); + EmitComplexRangeDiagZA( + D, ComplexArithmeticStr(Range), + !GccRangeComplexOption.empty() + ? GccRangeComplexOption + : ComplexArithmeticStr(LangOptions::ComplexRangeKind::CX_Basic)); + //D.Diag(clang::diag::warn_drv_overriding_option) + // << ComplexArithmeticStr(Range) + // << ComplexArithmeticStr(LangOptions::ComplexRangeKind::CX_Basic); Range = LangOptions::ComplexRangeKind::CX_Basic; SeenUnsafeMathModeOption = true; }; @@ -2811,6 +2837,64 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, switch (optID) { default: break; + case options::OPT_fcx_limited_range: + if (!GccRangeComplexOption.empty()) { + EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fcx-limited-range"); + } else { + if (Range != LangOptions::ComplexRangeKind::CX_Basic) + EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), + "-fcx-limited-range"); + } + GccRangeComplexOption = "-fcx-limited-range"; + //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Basic, + // GccRangeComplexOption); + //EmitComplexRangeDiagZA(D, ComplexRangeKindToStr(Range), + // GccRangeComplexOption); + Range = LangOptions::ComplexRangeKind::CX_Basic; + break; + case options::OPT_fno_cx_limited_range: + //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, + // GccRangeComplexOption); + //EmitComplexRangeDiagZA(D, ComplexRangeKindToStr(Range), + // GccRangeComplexOption); + if (!GccRangeComplexOption.empty() && + (GccRangeComplexOption.compare("-fcx-limited-range") != 0 && + GccRangeComplexOption.compare("-fno-cx-fortran-rules") != 0)) + EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fno-cx-limited-range"); + if (GccRangeComplexOption.empty()) + EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), + "-fno-cx-limited-range"); + GccRangeComplexOption = "-fno-cx-limited-range"; + Range = LangOptions::ComplexRangeKind::CX_Full; + break; + case options::OPT_fcx_fortran_rules: + if (!GccRangeComplexOption.empty()) + EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fcx-fortran-rules"); + else + EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), + "-fcx-fortran-rules"); + GccRangeComplexOption = "-fcx-fortran-rules"; + + //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Improved, + // GccRangeComplexOption); + Range = LangOptions::ComplexRangeKind::CX_Improved; + break; + case options::OPT_fno_cx_fortran_rules: + if (!GccRangeComplexOption.empty() && + GccRangeComplexOption.compare("-fno-cx-limited-range") == 0) + GccRangeComplexOption = "-fno-cx-fortran-rules"; + if (!GccRangeComplexOption.empty()) + EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fno-cx-fortran-rules"); + else + EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), + "-fno-cx-fortran-rules"); + GccRangeComplexOption = "-fno-cx-fortran-rules"; + //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, + // GccRangeComplexOption); + //EmitComplexRangeDiagZA(D, ComplexRangeKindToStr(Range), + // GccRangeComplexOption); + Range = LangOptions::ComplexRangeKind::CX_Full; + break; case options::OPT_fcomplex_arithmetic_EQ: { LangOptions::ComplexRangeKind RangeVal; StringRef Val = A->getValue(); @@ -2825,7 +2909,28 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, else D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << LangOptions::ComplexRangeKind::CX_None; - EmitComplexRangeDiag(D, Range, RangeVal); + //EmitComplexRangeDiag(D, Range, RangeVal, GccRangeComplexOption); + bool p1 = GccRangeComplexOption.compare("-fcx-limited-range"); + if (GccRangeComplexOption.empty() && !SeenUnsafeMathModeOption) { + EmitComplexRangeDiagZA(D, ComplexArithmeticStr(Range), + ComplexArithmeticStr(RangeVal)); + } + if (!GccRangeComplexOption.empty()) { + if (GccRangeComplexOption.compare("-fcx-limited-range") != 0) { + if (GccRangeComplexOption.compare("-fcx-fortran-rules") != 0) { + if (RangeVal != LangOptions::ComplexRangeKind::CX_Improved) + EmitComplexRangeDiagZA(D, GccRangeComplexOption, + ComplexArithmeticStr(RangeVal)); + } else { + EmitComplexRangeDiagZA(D, GccRangeComplexOption, + ComplexArithmeticStr(RangeVal)); + } + } else { + if (RangeVal != LangOptions::ComplexRangeKind::CX_Basic) + EmitComplexRangeDiagZA(D, GccRangeComplexOption, + ComplexArithmeticStr(RangeVal)); + } + } Range = RangeVal; break; } @@ -3247,6 +3352,14 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, CmdArgs.push_back(Args.MakeArgString("-fcomplex-arithmetic=" + ComplexRangeKindToStr(Range))); } + if (Args.hasArg(options::OPT_fcx_limited_range)) + CmdArgs.push_back("-fcx-limited-range"); + if (Args.hasArg(options::OPT_fcx_fortran_rules)) + CmdArgs.push_back("-fcx-fortran-rules"); + if (Args.hasArg(options::OPT_fno_cx_limited_range)) + CmdArgs.push_back("-fno-cx-limited-range"); + if (Args.hasArg(options::OPT_fno_cx_fortran_rules)) + CmdArgs.push_back("-fno-cx-fortran-rules"); } static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs, diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 87894ef47f8cb..0f692e2146a49 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -844,6 +844,11 @@ void Parser::HandlePragmaFPContract() { FPC = LangOptions::FPM_Off; break; case tok::OOS_DEFAULT: + // According to ISO C99 standard chapter 7.3.4, the default value + // for the pragma is ``off'. '-fcomplex-arithmetic=basic', + // '-fcx-limited-range', '-fcx-fortran-rules' and + // '-fcomplex-arithmetic=improved' control the default value of these + // pragmas. FPC = getLangOpts().getDefaultFPContractMode(); break; } diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index 57cb84cb793ad..f84aa35224102 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -4,6 +4,12 @@ // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=basic -o - | FileCheck %s --check-prefix=BASIC +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -fno-cx-limited-range -o - | FileCheck %s --check-prefix=FULL + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -complex-range=improved -o - | FileCheck %s --check-prefix=IMPRVD + // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=improved -o - | FileCheck %s --check-prefix=IMPRVD @@ -22,6 +28,9 @@ // RUN: -ffast-math -complex-range=full -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=FULL_FAST +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -fno-cx-fortran-rules -o - | FileCheck %s --check-prefix=FULL + // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=improved -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=IMPRVD diff --git a/clang/test/CodeGen/pragma-cx-limited-range.c b/clang/test/CodeGen/pragma-cx-limited-range.c index d47c71905887d..68615348c1871 100644 --- a/clang/test/CodeGen/pragma-cx-limited-range.c +++ b/clang/test/CodeGen/pragma-cx-limited-range.c @@ -4,6 +4,9 @@ // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=basic -o - | FileCheck --check-prefix=BASIC %s +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -fno-cx-limited-range -o - | FileCheck %s --check-prefix=FULL + // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=improved -o - | FileCheck --check-prefix=IMPRVD %s diff --git a/clang/test/Driver/range.c b/clang/test/Driver/range.c index b064fa9378ab9..ca089f6b79069 100644 --- a/clang/test/Driver/range.c +++ b/clang/test/Driver/range.c @@ -1,5 +1,38 @@ // Test range options for complex multiplication and division. +// RUN: %clang -### -target x86_64 -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s + +// RUN: %clang -### -target x86_64 -fno-cx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FULL %s + +// RUN: %clang -### -target x86_64 -fcx-limited-range -fcx-fortran-rules \ +// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN1 %s + +// RUN: %clang -### -target x86_64 -fno-cx-limited-range -fcx-fortran-rules \ +// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN2 %s + +// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-limited-range \ +// RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s + +// RUN: %clang -### -target x86_64 -fno-cx-limited-range -fcx-limited-range \ +// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN3 %s + +// RUN: %clang -### -target x86_64 -fno-cx-limited-range -fno-cx-fortran-rules \ +// RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s + +// RUN: %clang -### -target x86_64 -fno-cx-fortran-rules -fno-cx-limited-range \ +// RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s + +// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-fortran-rules \ +// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN4 %s + +// RUN: %clang -### -target x86_64 -fcx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=IMPRVD %s + +// RUN: %clang -### -target x86_64 -fno-cx-fortran-rules -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FULL %s + // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=BASIC %s @@ -13,64 +46,100 @@ // RUN: | FileCheck --check-prefix=FULL %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ +// RUN: -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ +// RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN5 %s + +// RUN: %clang -### -target x86_64 -fcx-limited-range \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN1 %s +// RUN: | FileCheck --check-prefix=WARN6 %s + +// RUN: %clang -### -target x86_64 -fcx-fortran-rules \ +// RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN7 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN2 %s +// RUN: | FileCheck --check-prefix=WARN8 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ // RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN3 %s +// RUN: | FileCheck --check-prefix=WARN9 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ // RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN4 %s +// RUN: | FileCheck --check-prefix=WARN10 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN5 %s +// RUN: | FileCheck --check-prefix=WARN11 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ // RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN6 %s +// RUN: | FileCheck --check-prefix=WARN12 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN7 %s +// RUN: | FileCheck --check-prefix=WARN13 %s + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ +// RUN: -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN14 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN8 %s +// RUN: | FileCheck --check-prefix=WARN15 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN9 %s - +// RUN: | FileCheck --check-prefix=WARN16 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN10 %s +// RUN: | FileCheck --check-prefix=WARN17 %s + +// RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ +// RUN: -ffast-math -c %s 2>&1 | FileCheck --check-prefix=WARN17 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN11 %s +// RUN: | FileCheck --check-prefix=WARN18 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN12 %s +// RUN: | FileCheck --check-prefix=WARN19 %s // RUN: %clang -### -target x86_64 -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=BASIC %s +// RUN: %clang -### -target x86_64 -ffast-math -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s + +// RUN: %clang -### -target x86_64 -fcx-limited-range -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s + +// RUN: %clang -### -target x86_64 -ffast-math -fno-cx-limited-range -c %s \ +// RUN: 2>&1 | FileCheck --check-prefix=FULL %s + // RUN: %clang -### -target x86_64 -ffast-math -fcomplex-arithmetic=basic -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=BASIC %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=BASIC %s +// RUN: %clang -### -Werror -target x86_64 -fcx-limited-range -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s + +// RUN: %clang -### -target x86_64 -ffast-math -fcomplex-arithmetic=full -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FULL %s + +// RUN: %clang -### -target x86_64 -ffast-math -fcomplex-arithmetic=basic -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=BASIC %s + // BASIC: -complex-range=basic // FULL: -complex-range=full // PRMTD: -complex-range=promoted @@ -79,15 +148,23 @@ // IMPRVD: -complex-range=improved // IMPRVD-NOT: -complex-range=basic // CHECK-NOT: -complex-range=improved -// WARN1: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] -// WARN2: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN3: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] -// WARN4: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN5: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN6: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] -// WARN7: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN8: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] -// WARN9: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN10: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN11: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] -// WARN12: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] + +// WARN1: warning: overriding '-fcx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option] +// WARN2: warning: overriding '-fno-cx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option] +// WARN3: overriding '-fno-cx-limited-range' option with '-fcx-limited-range' [-Woverriding-option] +// WARN4: warning: overriding '-fcx-limited-range' option with '-fno-cx-fortran-rules' [-Woverriding-option] +// WARN5: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] +// WARN6: warning: overriding '-fcx-limited-range' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] +// WARN7: warning: overriding '-fcx-fortran-rules' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] +// WARN8: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN9: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] +// WARN10: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] +// WARN11: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN12: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] +// WARN13: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] +// WARN14: overriding '-complex-range=promoted' option with '-fcx-limited-range' [-Woverriding-option] +// WARN15: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] +// WARN16: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=full' [-Woverriding-option] +// WARN17: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] +// WARN18: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] +// WARN19: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] From 1d61aa6ef9f3eb080bd78e117492ee48136c5c50 Mon Sep 17 00:00:00 2001 From: Ammarguellat Date: Mon, 26 Feb 2024 07:43:59 -0800 Subject: [PATCH 06/18] Fix format. --- clang/lib/CodeGen/CGExprComplex.cpp | 3 +- clang/lib/Driver/ToolChains/Clang.cpp | 84 ++++++++------------------- 2 files changed, 24 insertions(+), 63 deletions(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 5532b876c1779..77f416ccaf8ff 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -1025,8 +1025,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); const TargetInfo &TI = CGF.getContext().getTargetInfo(); - QualType ComplexElementTy = - Op.Ty->castAs()->getElementType(); + QualType ComplexElementTy = Op.Ty->castAs()->getElementType(); const BuiltinType *BT = ComplexElementTy->getAs(); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || (TI.getTriple().isOSLinux() && diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fa26937a253c8..1d67a9000fda0 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2712,25 +2712,7 @@ static std::string ComplexArithmeticStr(LangOptions::ComplexRangeKind Range) { : "-fcomplex-arithmetic=" + ComplexRangeKindToStr(Range); } -static void EmitComplexRangeDiag(const Driver &D, - LangOptions::ComplexRangeKind Range1, - LangOptions::ComplexRangeKind Range2, - std::string GccRangeComplexOption) { - if (Range1 != Range2 && Range1 != LangOptions::ComplexRangeKind::CX_None) { - if (!GccRangeComplexOption.empty()) - if (Range1 == LangOptions::ComplexRangeKind::CX_Basic) - D.Diag(clang::diag::warn_drv_overriding_option) - << GccRangeComplexOption << ComplexArithmeticStr(Range2); - else if (Range2 == LangOptions::ComplexRangeKind::CX_Basic) - D.Diag(clang::diag::warn_drv_overriding_option) - << ComplexArithmeticStr(Range1) << GccRangeComplexOption; - else - D.Diag(clang::diag::warn_drv_overriding_option) - << ComplexArithmeticStr(Range1) << ComplexArithmeticStr(Range2); - } -} - -static void EmitComplexRangeDiagZA(const Driver &D, std::string str1, +static void EmitComplexRangeDiag(const Driver &D, std::string str1, std::string str2) { if ((str1.compare(str2) != 0) && !str2.empty() && !str1.empty()) { D.Diag(clang::diag::warn_drv_overriding_option) << str1 << str2; @@ -2814,14 +2796,11 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // Warn if user expects to perform full implementation of complex // multiplication or division in the presence of nnan or ninf flags. if (Range == LangOptions::ComplexRangeKind::CX_Full) - EmitComplexRangeDiagZA( + EmitComplexRangeDiag( D, ComplexArithmeticStr(Range), !GccRangeComplexOption.empty() ? GccRangeComplexOption : ComplexArithmeticStr(LangOptions::ComplexRangeKind::CX_Basic)); - //D.Diag(clang::diag::warn_drv_overriding_option) - // << ComplexArithmeticStr(Range) - // << ComplexArithmeticStr(LangOptions::ComplexRangeKind::CX_Basic); Range = LangOptions::ComplexRangeKind::CX_Basic; SeenUnsafeMathModeOption = true; }; @@ -2839,44 +2818,33 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, break; case options::OPT_fcx_limited_range: if (!GccRangeComplexOption.empty()) { - EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fcx-limited-range"); + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-limited-range"); } else { if (Range != LangOptions::ComplexRangeKind::CX_Basic) - EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), - "-fcx-limited-range"); + EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fcx-limited-range"); } GccRangeComplexOption = "-fcx-limited-range"; - //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Basic, - // GccRangeComplexOption); - //EmitComplexRangeDiagZA(D, ComplexRangeKindToStr(Range), - // GccRangeComplexOption); Range = LangOptions::ComplexRangeKind::CX_Basic; break; case options::OPT_fno_cx_limited_range: - //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, - // GccRangeComplexOption); - //EmitComplexRangeDiagZA(D, ComplexRangeKindToStr(Range), - // GccRangeComplexOption); if (!GccRangeComplexOption.empty() && (GccRangeComplexOption.compare("-fcx-limited-range") != 0 && GccRangeComplexOption.compare("-fno-cx-fortran-rules") != 0)) - EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fno-cx-limited-range"); + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fno-cx-limited-range"); if (GccRangeComplexOption.empty()) - EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), - "-fno-cx-limited-range"); + EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fno-cx-limited-range"); GccRangeComplexOption = "-fno-cx-limited-range"; Range = LangOptions::ComplexRangeKind::CX_Full; break; case options::OPT_fcx_fortran_rules: if (!GccRangeComplexOption.empty()) - EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fcx-fortran-rules"); + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-fortran-rules"); else - EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), - "-fcx-fortran-rules"); + EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fcx-fortran-rules"); GccRangeComplexOption = "-fcx-fortran-rules"; - - //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Improved, - // GccRangeComplexOption); Range = LangOptions::ComplexRangeKind::CX_Improved; break; case options::OPT_fno_cx_fortran_rules: @@ -2884,15 +2852,11 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, GccRangeComplexOption.compare("-fno-cx-limited-range") == 0) GccRangeComplexOption = "-fno-cx-fortran-rules"; if (!GccRangeComplexOption.empty()) - EmitComplexRangeDiagZA(D, GccRangeComplexOption, "-fno-cx-fortran-rules"); + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fno-cx-fortran-rules"); else - EmitComplexRangeDiagZA(D, RenderComplexRangeOption(Range), - "-fno-cx-fortran-rules"); + EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fno-cx-fortran-rules"); GccRangeComplexOption = "-fno-cx-fortran-rules"; - //EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, - // GccRangeComplexOption); - //EmitComplexRangeDiagZA(D, ComplexRangeKindToStr(Range), - // GccRangeComplexOption); Range = LangOptions::ComplexRangeKind::CX_Full; break; case options::OPT_fcomplex_arithmetic_EQ: { @@ -2909,28 +2873,26 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, else D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << LangOptions::ComplexRangeKind::CX_None; - //EmitComplexRangeDiag(D, Range, RangeVal, GccRangeComplexOption); - bool p1 = GccRangeComplexOption.compare("-fcx-limited-range"); if (GccRangeComplexOption.empty() && !SeenUnsafeMathModeOption) { - EmitComplexRangeDiagZA(D, ComplexArithmeticStr(Range), - ComplexArithmeticStr(RangeVal)); + EmitComplexRangeDiag(D, ComplexArithmeticStr(Range), + ComplexArithmeticStr(RangeVal)); } if (!GccRangeComplexOption.empty()) { if (GccRangeComplexOption.compare("-fcx-limited-range") != 0) { if (GccRangeComplexOption.compare("-fcx-fortran-rules") != 0) { if (RangeVal != LangOptions::ComplexRangeKind::CX_Improved) - EmitComplexRangeDiagZA(D, GccRangeComplexOption, - ComplexArithmeticStr(RangeVal)); - } else { - EmitComplexRangeDiagZA(D, GccRangeComplexOption, + EmitComplexRangeDiag(D, GccRangeComplexOption, ComplexArithmeticStr(RangeVal)); + } else { + EmitComplexRangeDiag(D, GccRangeComplexOption, + ComplexArithmeticStr(RangeVal)); } } else { if (RangeVal != LangOptions::ComplexRangeKind::CX_Basic) - EmitComplexRangeDiagZA(D, GccRangeComplexOption, - ComplexArithmeticStr(RangeVal)); + EmitComplexRangeDiag(D, GccRangeComplexOption, + ComplexArithmeticStr(RangeVal)); } - } + } Range = RangeVal; break; } From 5aa271136c9e78b3f250f45fd8f5464e3a108d1f Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Mon, 26 Feb 2024 10:17:46 -0800 Subject: [PATCH 07/18] Fix format. --- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 19914399a097c..c8176efb3e82a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2713,7 +2713,7 @@ static std::string ComplexArithmeticStr(LangOptions::ComplexRangeKind Range) { } static void EmitComplexRangeDiag(const Driver &D, std::string str1, - std::string str2) { + std::string str2) { if ((str1.compare(str2) != 0) && !str2.empty() && !str1.empty()) { D.Diag(clang::diag::warn_drv_overriding_option) << str1 << str2; } From ba9a8da81084ccc3a20b428960ba8a3162afb6d8 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Tue, 27 Feb 2024 11:16:08 -0800 Subject: [PATCH 08/18] Fixed LIT test nofpclass.c and fixed the type promotion. --- clang/lib/CodeGen/CGExprComplex.cpp | 38 +++---- clang/test/CodeGen/nofpclass.c | 164 +++++++--------------------- 2 files changed, 55 insertions(+), 147 deletions(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 77f416ccaf8ff..bb09a551fdfc9 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -286,30 +286,29 @@ class ComplexExprEmitter QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType, bool IsDivOpCode) { const TargetInfo &TI = CGF.getContext().getTargetInfo(); + const LangOptions Opts = CGF.getLangOpts(); if (const auto *BT = dyn_cast(ElementType)) { switch (BT->getKind()) { - case BuiltinType::Kind::Float16: + case BuiltinType::Kind::Float16: { + if (TI.hasFloat16Type() && !TI.hasLegalHalfType()) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + break; + } case BuiltinType::Kind::BFloat16: { - return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + if (TI.hasBFloat16Type() && !TI.hasFullBFloat16Type()) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + break; } case BuiltinType::Kind::Float: return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); - case BuiltinType::Kind::Double: - if (TI.hasLongDoubleType()) { - return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); - } else { - return QualType(); - } - case BuiltinType::Kind::LongDouble: - if (TI.getTriple().isOSLinux()) { - if (TI.hasFloat128Type() && !TI.hasLongDoubleType()) - return CGF.getContext().getComplexType(CGF.getContext().Float128Ty); - else - return CGF.getContext().getComplexType( - CGF.getContext().LongDoubleTy); - } - if (TI.getTriple().isOSWindows()) + break; + case BuiltinType::Kind::Double: { + if (TI.hasLongDoubleType()) return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); + else + return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); + break; + } default: return QualType(); } @@ -1024,12 +1023,11 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { llvm::Value *OrigLHSi = LHSi; if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); - const TargetInfo &TI = CGF.getContext().getTargetInfo(); QualType ComplexElementTy = Op.Ty->castAs()->getElementType(); const BuiltinType *BT = ComplexElementTy->getAs(); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || - (TI.getTriple().isOSLinux() && - BT->getKind() == BuiltinType::Kind::LongDouble)) + (Op.FPFeatures.getComplexRange() == + LangOptions::CX_Promoted && BT->getKind() == BuiltinType::Kind::LongDouble)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c index dd90d02f7759b..9c5617924feac 100644 --- a/clang/test/CodeGen/nofpclass.c +++ b/clang/test/CodeGen/nofpclass.c @@ -542,35 +542,22 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // CFINITEONLY-NEXT: [[MUL_BC:%.*]] = fmul nnan ninf double [[C_IMAG]], [[C_REAL2]] // CFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf double [[MUL_AC]], [[MUL_BD]] // CFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf double [[MUL_AD]], [[MUL_BC]] -// CFINITEONLY-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan ninf uno double [[MUL_R]], [[MUL_R]] -// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] -// CFINITEONLY: complex_mul_imag_nan: -// CFINITEONLY-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan ninf uno double [[MUL_I]], [[MUL_I]] -// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] -// CFINITEONLY: complex_mul_libcall: -// CFINITEONLY-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] -// CFINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 -// CFINITEONLY-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 -// CFINITEONLY-NEXT: br label [[COMPLEX_MUL_CONT]] -// CFINITEONLY: complex_mul_cont: -// CFINITEONLY-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan ninf double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP2]], [[COMPLEX_MUL_LIBCALL]] ] -// CFINITEONLY-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan ninf double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP3]], [[COMPLEX_MUL_LIBCALL]] ] // CFINITEONLY-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 // CFINITEONLY-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 -// CFINITEONLY-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 -// CFINITEONLY-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 -// CFINITEONLY-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 -// CFINITEONLY-NEXT: ret { double, double } [[TMP4]] +// CFINITEONLY-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// CFINITEONLY-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// CFINITEONLY-NEXT: [[TMP2:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// CFINITEONLY-NEXT: ret { double, double } [[TMP2]] // // CLFINITEONLY: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) // CLFINITEONLY-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret // CLFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[C_COERCE0:%.*]], double noundef nofpclass(nan inf) [[C_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CLFINITEONLY-NEXT: entry: -// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf double [[C_COERCE0]], [[C_COERCE1]] -// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf double [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[MUL_AC:%.*]] = fmul nnan ninf double [[C_COERCE0]], [[C_COERCE0]] // CLFINITEONLY-NEXT: [[MUL_BD:%.*]] = fmul nnan ninf double [[C_COERCE1]], [[C_COERCE1]] +// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf double [[C_COERCE0]], [[C_COERCE1]] // CLFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf double [[MUL_AC]], [[MUL_BD]] +// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf double [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { double, double } poison, double [[MUL_R]], 0 // CLFINITEONLY-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { double, double } [[DOTFCA_0_INSERT]], double [[MUL_I]], 1 // CLFINITEONLY-NEXT: ret { double, double } [[DOTFCA_1_INSERT]] @@ -599,25 +586,12 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // NONANS-NEXT: [[MUL_BC:%.*]] = fmul nnan double [[C_IMAG]], [[C_REAL2]] // NONANS-NEXT: [[MUL_R:%.*]] = fsub nnan double [[MUL_AC]], [[MUL_BD]] // NONANS-NEXT: [[MUL_I:%.*]] = fadd nnan double [[MUL_AD]], [[MUL_BC]] -// NONANS-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan uno double [[MUL_R]], [[MUL_R]] -// NONANS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] -// NONANS: complex_mul_imag_nan: -// NONANS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan uno double [[MUL_I]], [[MUL_I]] -// NONANS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] -// NONANS: complex_mul_libcall: -// NONANS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] -// NONANS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 -// NONANS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 -// NONANS-NEXT: br label [[COMPLEX_MUL_CONT]] -// NONANS: complex_mul_cont: -// NONANS-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP2]], [[COMPLEX_MUL_LIBCALL]] ] -// NONANS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP3]], [[COMPLEX_MUL_LIBCALL]] ] // NONANS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 // NONANS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 -// NONANS-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 -// NONANS-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 -// NONANS-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 -// NONANS-NEXT: ret { double, double } [[TMP4]] +// NONANS-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// NONANS-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// NONANS-NEXT: [[TMP2:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// NONANS-NEXT: ret { double, double } [[TMP2]] // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret @@ -643,25 +617,12 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // NOINFS-NEXT: [[MUL_BC:%.*]] = fmul ninf double [[C_IMAG]], [[C_REAL2]] // NOINFS-NEXT: [[MUL_R:%.*]] = fsub ninf double [[MUL_AC]], [[MUL_BD]] // NOINFS-NEXT: [[MUL_I:%.*]] = fadd ninf double [[MUL_AD]], [[MUL_BC]] -// NOINFS-NEXT: [[ISNAN_CMP:%.*]] = fcmp ninf uno double [[MUL_R]], [[MUL_R]] -// NOINFS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] -// NOINFS: complex_mul_imag_nan: -// NOINFS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp ninf uno double [[MUL_I]], [[MUL_I]] -// NOINFS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] -// NOINFS: complex_mul_libcall: -// NOINFS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] -// NOINFS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 -// NOINFS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 -// NOINFS-NEXT: br label [[COMPLEX_MUL_CONT]] -// NOINFS: complex_mul_cont: -// NOINFS-NEXT: [[REAL_MUL_PHI:%.*]] = phi ninf double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP2]], [[COMPLEX_MUL_LIBCALL]] ] -// NOINFS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi ninf double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP3]], [[COMPLEX_MUL_LIBCALL]] ] // NOINFS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 // NOINFS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 -// NOINFS-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 -// NOINFS-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 -// NOINFS-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 -// NOINFS-NEXT: ret { double, double } [[TMP4]] +// NOINFS-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// NOINFS-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// NOINFS-NEXT: [[TMP2:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// NOINFS-NEXT: ret { double, double } [[TMP2]] // _Complex double defined_complex_func_f64_ret(_Complex double c) { return c * c; @@ -669,11 +630,10 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // CFINITEONLY: Function Attrs: noinline nounwind optnone // CFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <2 x half> @defined_complex_func_f16_ret -// CFINITEONLY-SAME: (<2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR4]] { +// CFINITEONLY-SAME: (<2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR5:[0-9]+]] { // CFINITEONLY-NEXT: entry: // CFINITEONLY-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 // CFINITEONLY-NEXT: [[C:%.*]] = alloca { half, half }, align 2 -// CFINITEONLY-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 // CFINITEONLY-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 // CFINITEONLY-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 // CFINITEONLY-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 @@ -693,28 +653,12 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // CFINITEONLY-NEXT: [[MUL_BC:%.*]] = fmul nnan ninf float [[EXT1]], [[EXT6]] // CFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf float [[MUL_AC]], [[MUL_BD]] // CFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf float [[MUL_AD]], [[MUL_BC]] -// CFINITEONLY-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan ninf uno float [[MUL_R]], [[MUL_R]] -// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] -// CFINITEONLY: complex_mul_imag_nan: -// CFINITEONLY-NEXT: [[ISNAN_CMP8:%.*]] = fcmp nnan ninf uno float [[MUL_I]], [[MUL_I]] -// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP8]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] -// CFINITEONLY: complex_mul_libcall: -// CFINITEONLY-NEXT: [[CALL:%.*]] = call nnan ninf nofpclass(nan inf) <2 x float> @__mulsc3(float noundef nofpclass(nan inf) [[EXT]], float noundef nofpclass(nan inf) [[EXT1]], float noundef nofpclass(nan inf) [[EXT6]], float noundef nofpclass(nan inf) [[EXT7]]) #[[ATTR7]] -// CFINITEONLY-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 -// CFINITEONLY-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 -// CFINITEONLY-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 -// CFINITEONLY-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 -// CFINITEONLY-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 -// CFINITEONLY-NEXT: br label [[COMPLEX_MUL_CONT]] -// CFINITEONLY: complex_mul_cont: -// CFINITEONLY-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan ninf float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] -// CFINITEONLY-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan ninf float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] -// CFINITEONLY-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half -// CFINITEONLY-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half +// CFINITEONLY-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// CFINITEONLY-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half // CFINITEONLY-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 // CFINITEONLY-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 // CFINITEONLY-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 -// CFINITEONLY-NEXT: store half [[UNPROMOTION9]], ptr [[RETVAL_IMAGP]], align 2 +// CFINITEONLY-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 // CFINITEONLY-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 // CFINITEONLY-NEXT: ret <2 x half> [[TMP0]] // @@ -723,27 +667,26 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // CLFINITEONLY-SAME: (<2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { // CLFINITEONLY-NEXT: entry: // CLFINITEONLY-NEXT: [[C_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x half> [[C_COERCE]], i64 0 -// CLFINITEONLY-NEXT: [[EXT:%.*]] = fpext half [[C_SROA_0_0_VEC_EXTRACT]] to float // CLFINITEONLY-NEXT: [[C_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <2 x half> [[C_COERCE]], i64 1 +// CLFINITEONLY-NEXT: [[EXT:%.*]] = fpext half [[C_SROA_0_0_VEC_EXTRACT]] to float // CLFINITEONLY-NEXT: [[EXT1:%.*]] = fpext half [[C_SROA_0_2_VEC_EXTRACT]] to float -// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf float [[EXT]], [[EXT1]] -// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf float [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[MUL_AC:%.*]] = fmul nnan ninf float [[EXT]], [[EXT]] // CLFINITEONLY-NEXT: [[MUL_BD:%.*]] = fmul nnan ninf float [[EXT1]], [[EXT1]] +// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf float [[EXT]], [[EXT1]] // CLFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf float [[MUL_AC]], [[MUL_BD]] +// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf float [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half -// CLFINITEONLY-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[MUL_I]] to half +// CLFINITEONLY-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half // CLFINITEONLY-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[UNPROMOTION]], i64 0 -// CLFINITEONLY-NEXT: [[RETVAL_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[RETVAL_SROA_0_0_VEC_INSERT]], half [[UNPROMOTION9]], i64 1 +// CLFINITEONLY-NEXT: [[RETVAL_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[RETVAL_SROA_0_0_VEC_INSERT]], half [[UNPROMOTION8]], i64 1 // CLFINITEONLY-NEXT: ret <2 x half> [[RETVAL_SROA_0_2_VEC_INSERT]] // // NONANS: Function Attrs: noinline nounwind optnone // NONANS-LABEL: define dso_local nofpclass(nan) <2 x half> @defined_complex_func_f16_ret -// NONANS-SAME: (<2 x half> noundef nofpclass(nan) [[C_COERCE:%.*]]) #[[ATTR4]] { +// NONANS-SAME: (<2 x half> noundef nofpclass(nan) [[C_COERCE:%.*]]) #[[ATTR5:[0-9]+]] { // NONANS-NEXT: entry: // NONANS-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 // NONANS-NEXT: [[C:%.*]] = alloca { half, half }, align 2 -// NONANS-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 // NONANS-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 // NONANS-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 // NONANS-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 @@ -763,38 +706,21 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // NONANS-NEXT: [[MUL_BC:%.*]] = fmul nnan float [[EXT1]], [[EXT6]] // NONANS-NEXT: [[MUL_R:%.*]] = fsub nnan float [[MUL_AC]], [[MUL_BD]] // NONANS-NEXT: [[MUL_I:%.*]] = fadd nnan float [[MUL_AD]], [[MUL_BC]] -// NONANS-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan uno float [[MUL_R]], [[MUL_R]] -// NONANS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] -// NONANS: complex_mul_imag_nan: -// NONANS-NEXT: [[ISNAN_CMP8:%.*]] = fcmp nnan uno float [[MUL_I]], [[MUL_I]] -// NONANS-NEXT: br i1 [[ISNAN_CMP8]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] -// NONANS: complex_mul_libcall: -// NONANS-NEXT: [[CALL:%.*]] = call nnan nofpclass(nan) <2 x float> @__mulsc3(float noundef nofpclass(nan) [[EXT]], float noundef nofpclass(nan) [[EXT1]], float noundef nofpclass(nan) [[EXT6]], float noundef nofpclass(nan) [[EXT7]]) #[[ATTR7]] -// NONANS-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 -// NONANS-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 -// NONANS-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 -// NONANS-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 -// NONANS-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 -// NONANS-NEXT: br label [[COMPLEX_MUL_CONT]] -// NONANS: complex_mul_cont: -// NONANS-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] -// NONANS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] -// NONANS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half -// NONANS-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half +// NONANS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// NONANS-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half // NONANS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 // NONANS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 // NONANS-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 -// NONANS-NEXT: store half [[UNPROMOTION9]], ptr [[RETVAL_IMAGP]], align 2 +// NONANS-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 // NONANS-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 // NONANS-NEXT: ret <2 x half> [[TMP0]] // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local nofpclass(inf) <2 x half> @defined_complex_func_f16_ret -// NOINFS-SAME: (<2 x half> noundef nofpclass(inf) [[C_COERCE:%.*]]) #[[ATTR4]] { +// NOINFS-SAME: (<2 x half> noundef nofpclass(inf) [[C_COERCE:%.*]]) #[[ATTR5:[0-9]+]] { // NOINFS-NEXT: entry: // NOINFS-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 // NOINFS-NEXT: [[C:%.*]] = alloca { half, half }, align 2 -// NOINFS-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 // NOINFS-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 // NOINFS-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 // NOINFS-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 @@ -814,28 +740,12 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // NOINFS-NEXT: [[MUL_BC:%.*]] = fmul ninf float [[EXT1]], [[EXT6]] // NOINFS-NEXT: [[MUL_R:%.*]] = fsub ninf float [[MUL_AC]], [[MUL_BD]] // NOINFS-NEXT: [[MUL_I:%.*]] = fadd ninf float [[MUL_AD]], [[MUL_BC]] -// NOINFS-NEXT: [[ISNAN_CMP:%.*]] = fcmp ninf uno float [[MUL_R]], [[MUL_R]] -// NOINFS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] -// NOINFS: complex_mul_imag_nan: -// NOINFS-NEXT: [[ISNAN_CMP8:%.*]] = fcmp ninf uno float [[MUL_I]], [[MUL_I]] -// NOINFS-NEXT: br i1 [[ISNAN_CMP8]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] -// NOINFS: complex_mul_libcall: -// NOINFS-NEXT: [[CALL:%.*]] = call ninf nofpclass(inf) <2 x float> @__mulsc3(float noundef nofpclass(inf) [[EXT]], float noundef nofpclass(inf) [[EXT1]], float noundef nofpclass(inf) [[EXT6]], float noundef nofpclass(inf) [[EXT7]]) #[[ATTR7]] -// NOINFS-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 -// NOINFS-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 -// NOINFS-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 -// NOINFS-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 -// NOINFS-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 -// NOINFS-NEXT: br label [[COMPLEX_MUL_CONT]] -// NOINFS: complex_mul_cont: -// NOINFS-NEXT: [[REAL_MUL_PHI:%.*]] = phi ninf float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] -// NOINFS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi ninf float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] -// NOINFS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half -// NOINFS-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half +// NOINFS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// NOINFS-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half // NOINFS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 // NOINFS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 // NOINFS-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 -// NOINFS-NEXT: store half [[UNPROMOTION9]], ptr [[RETVAL_IMAGP]], align 2 +// NOINFS-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 // NOINFS-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 // NOINFS-NEXT: ret <2 x half> [[TMP0]] // @@ -1353,7 +1263,7 @@ extern __m256d extern_m256d(__m256d, ...); // CFINITEONLY: Function Attrs: noinline nounwind optnone // CFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <4 x double> @call_m256d -// CFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR5:[0-9]+]] { +// CFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { // CFINITEONLY-NEXT: entry: // CFINITEONLY-NEXT: [[X_ADDR:%.*]] = alloca <4 x double>, align 32 // CFINITEONLY-NEXT: store <4 x double> [[X]], ptr [[X_ADDR]], align 32 @@ -1371,7 +1281,7 @@ extern __m256d extern_m256d(__m256d, ...); // // NONANS: Function Attrs: noinline nounwind optnone // NONANS-LABEL: define dso_local nofpclass(nan) <4 x double> @call_m256d -// NONANS-SAME: (<4 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR5:[0-9]+]] { +// NONANS-SAME: (<4 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR6:[0-9]+]] { // NONANS-NEXT: entry: // NONANS-NEXT: [[X_ADDR:%.*]] = alloca <4 x double>, align 32 // NONANS-NEXT: store <4 x double> [[X]], ptr [[X_ADDR]], align 32 @@ -1382,7 +1292,7 @@ extern __m256d extern_m256d(__m256d, ...); // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local nofpclass(inf) <4 x double> @call_m256d -// NOINFS-SAME: (<4 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR5:[0-9]+]] { +// NOINFS-SAME: (<4 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { // NOINFS-NEXT: entry: // NOINFS-NEXT: [[X_ADDR:%.*]] = alloca <4 x double>, align 32 // NOINFS-NEXT: store <4 x double> [[X]], ptr [[X_ADDR]], align 32 @@ -1397,7 +1307,7 @@ __m256d call_m256d(__m256d x) { // CFINITEONLY: Function Attrs: noinline nounwind optnone // CFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <25 x double> @call_matrix -// CFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { +// CFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR7:[0-9]+]] { // CFINITEONLY-NEXT: entry: // CFINITEONLY-NEXT: [[X_ADDR:%.*]] = alloca [25 x double], align 8 // CFINITEONLY-NEXT: store <25 x double> [[X]], ptr [[X_ADDR]], align 8 @@ -1414,7 +1324,7 @@ __m256d call_m256d(__m256d x) { // // NONANS: Function Attrs: noinline nounwind optnone // NONANS-LABEL: define dso_local nofpclass(nan) <25 x double> @call_matrix -// NONANS-SAME: (<25 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR6:[0-9]+]] { +// NONANS-SAME: (<25 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR7:[0-9]+]] { // NONANS-NEXT: entry: // NONANS-NEXT: [[X_ADDR:%.*]] = alloca [25 x double], align 8 // NONANS-NEXT: store <25 x double> [[X]], ptr [[X_ADDR]], align 8 @@ -1424,7 +1334,7 @@ __m256d call_m256d(__m256d x) { // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local nofpclass(inf) <25 x double> @call_matrix -// NOINFS-SAME: (<25 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { +// NOINFS-SAME: (<25 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR7:[0-9]+]] { // NOINFS-NEXT: entry: // NOINFS-NEXT: [[X_ADDR:%.*]] = alloca [25 x double], align 8 // NOINFS-NEXT: store <25 x double> [[X]], ptr [[X_ADDR]], align 8 From 9098908bcec52a9a87cfb2d46a18d5c37ee8cd0a Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Tue, 27 Feb 2024 12:26:07 -0800 Subject: [PATCH 09/18] Addressed review comments. Fixed the warnings code in Clang.cpp and fixed LIT test range.c. --- clang/docs/UsersManual.rst | 6 ++-- clang/include/clang/Basic/LangOptions.h | 8 +++--- clang/lib/CodeGen/CGExprComplex.cpp | 7 ++--- clang/lib/Driver/ToolChains/Clang.cpp | 37 +++++++++++++------------ clang/test/Driver/range.c | 3 +- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 91a601ddd73d4..3dcea542c2009 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1855,12 +1855,12 @@ floating point semantic models: precise (the default), strict, and fast. * ``basic`` Implementation of complex division and multiplication using algebraic formulas at source precision. No special handling to avoid - overflow. NaN and infinite and values are not handled. + overflow. NaN and infinite values are not handled. * ``improved`` Implementation of complex division using the Smith algorithm at source precision. Smith's algorithm for complex division. See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). This value offers improved handling for overflow in intermediate - calculations, but overflow may occur. NaN and infinite and values are not + calculations, but overflow may occur. NaN and infinite values are not handled in some cases. * ``full`` Implementation of complex division and multiplication using a call to runtime library functions (generally the case, but the BE might @@ -1873,7 +1873,7 @@ floating point semantic models: precise (the default), strict, and fast. cases. If the target does not have native support for a higher precision data type, an implementation for the complex operation will be used to provide improved guards against intermediate overflow, but overflow and underflow may - still occur in some cases. NaN and infinite and values are not handled. + still occur in some cases. NaN and infinite values are not handled. This is the default value. .. option:: -fcx-limited-range: diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 2de21c5f60316..e372fd7091542 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -402,13 +402,13 @@ class LangOptionsBase { /// Implementation of complex division and multiplication using a call to /// runtime library functions(generally the case, but the BE might /// sometimes replace the library call if it knows enough about the - /// potential range of the inputs). Overflow and non -finite values are + /// potential range of the inputs). Overflow and non-finite values are /// handled by the library implementation. CX_Full, /// Implementation of complex division offering an improved handling /// for overflow in intermediate calculations with no special handling for - /// NaN and infinite and values. + /// NaN and infinite values. CX_Improved, /// Implementation of complex division using algebraic formulas at @@ -417,12 +417,12 @@ class LangOptionsBase { /// higher precision data type, an implementation for the complex operation /// will be used to provide improved guards against intermediate overflow, /// but overflow and underflow may still occur in some cases. NaN and - /// infinite and values are not handled. This is the default value. + /// infinite values are not handled. This is the default value. CX_Promoted, /// Implementation of complex division and multiplication using /// algebraic formulas at source precision. No special handling to avoid - /// overflow.NaN and infinite and values are not handled. + /// overflow. NaN and infinite values are not handled. CX_Basic, /// No range rule is enabled. diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index bb09a551fdfc9..500d24f0f82b6 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -305,8 +305,7 @@ class ComplexExprEmitter case BuiltinType::Kind::Double: { if (TI.hasLongDoubleType()) return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); - else - return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); + return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); break; } default: @@ -1026,8 +1025,8 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { QualType ComplexElementTy = Op.Ty->castAs()->getElementType(); const BuiltinType *BT = ComplexElementTy->getAs(); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || - (Op.FPFeatures.getComplexRange() == - LangOptions::CX_Promoted && BT->getKind() == BuiltinType::Kind::LongDouble)) + (Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted && + BT->getKind() == BuiltinType::Kind::LongDouble)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c8176efb3e82a..43e15332d8f31 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2817,45 +2817,48 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, default: break; case options::OPT_fcx_limited_range: - if (!GccRangeComplexOption.empty()) { - EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-limited-range"); - } else { + if (GccRangeComplexOption.empty()) { if (Range != LangOptions::ComplexRangeKind::CX_Basic) EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), "-fcx-limited-range"); + } else { + if (GccRangeComplexOption != "-fno-cx-limited-range") + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-limited-range"); } GccRangeComplexOption = "-fcx-limited-range"; Range = LangOptions::ComplexRangeKind::CX_Basic; break; case options::OPT_fno_cx_limited_range: - if (!GccRangeComplexOption.empty() && - (GccRangeComplexOption.compare("-fcx-limited-range") != 0 && - GccRangeComplexOption.compare("-fno-cx-fortran-rules") != 0)) - EmitComplexRangeDiag(D, GccRangeComplexOption, "-fno-cx-limited-range"); - if (GccRangeComplexOption.empty()) + if (GccRangeComplexOption.empty()) { EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), "-fno-cx-limited-range"); + } else { + if (GccRangeComplexOption.compare("-fcx-limited-range") != 0 && + GccRangeComplexOption.compare("-fno-cx-fortran-rules") != 0) + EmitComplexRangeDiag(D, GccRangeComplexOption, + "-fno-cx-limited-range"); + } GccRangeComplexOption = "-fno-cx-limited-range"; Range = LangOptions::ComplexRangeKind::CX_Full; break; case options::OPT_fcx_fortran_rules: - if (!GccRangeComplexOption.empty()) - EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-fortran-rules"); - else + if (GccRangeComplexOption.empty()) EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), "-fcx-fortran-rules"); + else + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-fortran-rules"); GccRangeComplexOption = "-fcx-fortran-rules"; Range = LangOptions::ComplexRangeKind::CX_Improved; break; case options::OPT_fno_cx_fortran_rules: - if (!GccRangeComplexOption.empty() && - GccRangeComplexOption.compare("-fno-cx-limited-range") == 0) - GccRangeComplexOption = "-fno-cx-fortran-rules"; - if (!GccRangeComplexOption.empty()) - EmitComplexRangeDiag(D, GccRangeComplexOption, "-fno-cx-fortran-rules"); - else + if (GccRangeComplexOption.empty()) { EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), "-fno-cx-fortran-rules"); + } else { + if (GccRangeComplexOption != "-fno-cx-limited-range") + EmitComplexRangeDiag(D, GccRangeComplexOption, + "-fno-cx-fortran-rules"); + } GccRangeComplexOption = "-fno-cx-fortran-rules"; Range = LangOptions::ComplexRangeKind::CX_Full; break; diff --git a/clang/test/Driver/range.c b/clang/test/Driver/range.c index ca089f6b79069..1d2ec04912ba4 100644 --- a/clang/test/Driver/range.c +++ b/clang/test/Driver/range.c @@ -16,7 +16,7 @@ // RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s // RUN: %clang -### -target x86_64 -fno-cx-limited-range -fcx-limited-range \ -// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN3 %s +// RUN: -c %s 2>&1 | FileCheck --check-prefix=BASIC %s // RUN: %clang -### -target x86_64 -fno-cx-limited-range -fno-cx-fortran-rules \ // RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s @@ -151,7 +151,6 @@ // WARN1: warning: overriding '-fcx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option] // WARN2: warning: overriding '-fno-cx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option] -// WARN3: overriding '-fno-cx-limited-range' option with '-fcx-limited-range' [-Woverriding-option] // WARN4: warning: overriding '-fcx-limited-range' option with '-fno-cx-fortran-rules' [-Woverriding-option] // WARN5: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] // WARN6: warning: overriding '-fcx-limited-range' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] From 52181c774abec3e88eb8b1dd233c98480c51dbc0 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Wed, 28 Feb 2024 08:35:11 -0800 Subject: [PATCH 10/18] Added more tests to cx-complex-range.c --- clang/test/CodeGen/cx-complex-range.c | 490 +++++++++++++++++++++++++- 1 file changed, 483 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index f84aa35224102..d5eac3a43b7fa 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -10,9 +10,6 @@ // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=improved -o - | FileCheck %s --check-prefix=IMPRVD -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ -// RUN: -complex-range=improved -o - | FileCheck %s --check-prefix=IMPRVD - // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=promoted -o - | FileCheck %s --check-prefix=PRMTD @@ -39,8 +36,8 @@ // RUN: -ffast-math -complex-range=promoted -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=PRMTD -_Complex float div(_Complex float a, _Complex float b) { - // LABEL: define {{.*}} @div( +_Complex float divf(_Complex float a, _Complex float b) { + // LABEL: define {{.*}} @divf( // FULL: call {{.*}} @__divsc3 // FULL_FAST: call {{.*}} @__divsc3 // @@ -108,8 +105,8 @@ _Complex float div(_Complex float a, _Complex float b) { return a / b; } -_Complex float mul(_Complex float a, _Complex float b) { - // LABEL: define {{.*}} @mul( +_Complex float mulf(_Complex float a, _Complex float b) { + // LABEL: define {{.*}} @mulf( // FULL: call {{.*}} @__mulsc3 // // FULL_FAST: alloca { float, float } @@ -186,3 +183,482 @@ _Complex float mul(_Complex float a, _Complex float b) { return a * b; } + +_Complex double divd(_Complex double a, _Complex double b) { + // LABEL: define {{.*}} @divd( + // FULL: call {{.*}} @__divdc3 + // FULL_FAST: call {{.*}} @__divdc3 + // + // BASIC: fmul{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fadd{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fadd{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fsub{{.*}}double + // BASIC-NEXT: fdiv{{.*}}double + // BASIC-NEXT: fdiv{{.*}}double + // + // IMPRVD: call{{.*}}double @llvm.fabs.f64(double {{.*}}) + // IMPRVD-NEXT: call{{.*}}double @llvm.fabs.f64(double {{.*}}) + // IMPRVD-NEXT: fcmp{{.*}}ugt double {{.*}}, {{.*}} + // IMPRVD-NEXT: br i1 {{.*}}, label + // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fadd{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fadd{{.*}}double + // IMPRVD-NEXT: fdiv{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fsub{{.*}}double + // IMPRVD-NEXT: fdiv{{.*}}double + // IMPRVD-NEXT: br label + // IMPRVD: abs_rhsr_less_than_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fadd{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fadd{{.*}}double + // IMPRVD-NEXT: fdiv{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fsub{{.*}}double + // IMPRVD-NEXT: fdiv{{.*}}double + // + // PRMTD: fpext double {{.*}} to x86_fp80 + // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 + // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load double, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load double, ptr {{.*}} + // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 + // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 + // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) + // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) + // PRMTD-NEXT: fcmp{{.*}}ugt x86_fp80 {{.*}},{{.*}} + // PRMTD-NEXT: br i1 {{.*}} + // PRMTD: abs_rhsr_greater_or_equal_abs_rhsi: + // PRMTD-NEXT: fdiv{{.*}} x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fsub{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: br label + // PRMTD: abs_rhsr_less_than_abs_rhsi: + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fsub{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv {{.*}}x86_fp80 + + return a / b; +} + +_Complex double muld(_Complex double a, _Complex double b) { + // LABEL: define {{.*}} @muld( + // FULL: call {{.*}} @__muldc3 + // + // FULL_FAST: alloca { double, double } + // FULL_FAST-NEXT: alloca { double, double } + // FULL_FAST-NEXT: alloca { double, double } + // FULL_FAST: load double, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load double, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 + // FULL_FAST-NEXT: load double, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load double + // FULL_FAST-NEXT: fmul{{.*}}double + // FULL_FAST-NEXT: fmul{{.*}}double + // FULL_FAST-NEXT: fmul{{.*}}double + // FULL_FAST-NEXT: fmul{{.*}}double + // FULL_FAST-NEXT: fsub{{.*}}double + // FULL_FAST-NEXT: fadd{{.*}}double + + // BASIC: alloca { double, double } + // BASIC-NEXT: alloca { double, double } + // BASIC-NEXT: alloca { double, double } + // BASIC: load double, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load double, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 + // BASIC-NEXT: load double, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fmul{{.*}}double + // BASIC-NEXT: fsub{{.*}}double + // BASIC-NEXT: fadd{{.*}}double + // + // IMPRVD: alloca { double, double } + // IMPRVD-NEXT: alloca { double, double } + // IMPRVD-NEXT: alloca { double, double } + // IMPRVD: load double, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load double, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 + // IMPRVD-NEXT: load double, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fmul{{.*}}double + // IMPRVD-NEXT: fsub{{.*}}double + // IMPRVD-NEXT: fadd{{.*}}double + // + // PRMTD: alloca { double, double } + // PRMTD-NEXT: alloca { double, double } + // PRMTD-NEXT: alloca { double, double } + // PRMTD: load double, ptr + // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load double, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load double, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fmul{{.*}}double + // PRMTD-NEXT: fsub{{.*}}double + // PRMTD-NEXT: fadd{{.*}}double + + return a * b; +} + +_Complex _Float16 divf16(_Complex _Float16 a, _Complex _Float16 b) { + // LABEL: define {{.*}} @divf16( + + // FULL: call {{.*}} @__divsc3 + // FULL_FAST: call {{.*}} @__divsc3 + // + // BASIC: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fadd{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fadd{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fsub{{.*}}float + // BASIC-NEXT: fdiv{{.*}}float + // BASIC-NEXT: fdiv{{.*}}float + // + // IMPRVD: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // IMPRVD-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) + // IMPRVD-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} + // IMPRVD-NEXT: br i1 {{.*}}, label + // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fsub{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: br label + // IMPRVD: abs_rhsr_less_than_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fsub{{.*}}float + // IMPRVD-NEXT: fdiv{{.*}}float + // + // PRMTD: load half, ptr {{.*}} + // PRMTD: fpext half {{.*}} to float + // PRMTD-NEXT: fpext half {{.*}} to float + // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load half, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load half, ptr {{.*}} + // PRMTD-NEXT: fpext half {{.*}} to float + // PRMTD-NEXT: fpext half {{.*}} to float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fadd{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fadd{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fsub{{.*}}float + // PRMTD-NEXT: fdiv{{.*}}float + // PRMTD-NEXT: fdiv{{.*}}float + // PRMTD-NEXT: fptrunc float {{.*}} to half + // PRMTD-NEXT: fptrunc float {{.*}} to half + + return a / b; +} + +_Complex _Float16 mulf16(_Complex _Float16 a, _Complex _Float16 b) { + // LABEL: define {{.*}} @mulf16( + // FULL: call {{.*}} @__mulsc3 + // + // FULL_FAST: alloca { half, half } + // FULL_FAST-NEXT: alloca { half, half } + // FULL_FAST-NEXT: alloca { half, half } + // FULL_FAST: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // FULL_FAST-NEXT: load half, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load half, ptr {{.*}} + // FULL_FAST-NEXT: fpext half {{.*}} to float + // FULL_FAST-NEXT: fpext half {{.*}} to float + // FULL_FAST-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // FULL_FAST-NEXT: load half, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load half + // FULL_FAST-NEXT: fpext half {{.*}} to float + // FULL_FAST-NEXT: fpext half {{.*}} to float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fmul{{.*}}float + // FULL_FAST-NEXT: fsub{{.*}}float + // FULL_FAST-NEXT: fadd{{.*}}float + // FULL_FAST-NEXT: fptrunc float {{.*}} to half + // FULL_FAST-NEXT: fptrunc float {{.*}} to half + + // BASIC: alloca { half, half } + // BASIC-NEXT: alloca { half, half } + // BASIC: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // BASIC-NEXT: load half, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load half, ptr {{.*}} + // BASIC-NEXT: fpext half {{.*}} to float + // BASIC-NEXT: fpext half {{.*}} to float + // BASIC-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // BASIC-NEXT: load half, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load half + // BASIC-NEXT: fpext half {{.*}} to float + // BASIC-NEXT: fpext half {{.*}} to float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fmul{{.*}}float + // BASIC-NEXT: fsub{{.*}}float + // BASIC-NEXT: fadd{{.*}}float + // BASIC-NEXT: fptrunc float {{.*}} to half + // BASIC-NEXT: fptrunc float {{.*}} to half + // + // IMPRVD: alloca { half, half } + // IMPRVD-NEXT: alloca { half, half } + // IMPRVD-NEXT: alloca { half, half } + // IMPRVD: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // IMPRVD-NEXT: load half, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load half, ptr {{.*}} + // IMPRVD-NEXT: fpext half {{.*}} to float + // IMPRVD-NEXT: fpext half {{.*}} to float + // IMPRVD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // IMPRVD-NEXT: load half, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load half + // IMPRVD-NEXT: fpext half {{.*}} to float + // IMPRVD-NEXT: fpext half {{.*}} to float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fmul{{.*}}float + // IMPRVD-NEXT: fsub{{.*}}float + // IMPRVD-NEXT: fadd{{.*}}float + // IMPRVD-NEXT: fptrunc float {{.*}} to half + // IMPRVD-NEXT: fptrunc float {{.*}} to half + + // PRMTD: alloca { half, half } + // PRMTD-NEXT: alloca { half, half } + // PRMTD-NEXT: alloca { half, half } + // PRMTD: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load half, ptr + // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load half, ptr {{.*}} + // PRMTD-NEXT: fpext half {{.*}} to float + // PRMTD-NEXT: fpext half {{.*}} to float + // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load half, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load{{.*}}half + // PRMTD-NEXT: fpext half {{.*}} to float + // PRMTD-NEXT: fpext half {{.*}} to float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fmul{{.*}}float + // PRMTD-NEXT: fsub{{.*}}float + // PRMTD-NEXT: fadd{{.*}}float + // PRMTD-NEXT: fptrunc float {{.*}} to half + // PRMTD-NEXT: fptrunc float {{.*}} to half + + return a * b; +} + +_Complex long double divld(_Complex long double a, _Complex long double b) { + // LABEL: define {{.*}} @divld( + // FULL: call {{.*}} @__divxc3 + // FULL_FAST: call {{.*}} @__divxc3 + // + // BASIC: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fadd{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fadd{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fsub{{.*}}x86_fp80 + // BASIC-NEXT: fdiv{{.*}}x86_fp80 + // BASIC-NEXT: fdiv{{.*}}x86_fp80 + // + // IMPRVD: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) + // IMPRVD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) + // IMPRVD-NEXT: fcmp{{.*}}ugt x86_fp80 {{.*}}, {{.*}} + // IMPRVD-NEXT: br i1 {{.*}}, label + // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fadd{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fadd{{.*}}x86_fp80 + // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fsub{{.*}}x86_fp80 + // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 + // IMPRVD-NEXT: br label + // IMPRVD: abs_rhsr_less_than_abs_rhsi: + // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fadd{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fadd{{.*}}x86_fp80 + // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fsub{{.*}}x86_fp80 + // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 + // + // PRMTD: alloca { x86_fp80, x86_fp80 } + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load x86_fp80, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load x86_fp80, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load x86_fp80, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 } + // PRMTD-NEXT: load x86_fp80, ptr {{.*}} + // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) + // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) + // PRMTD-NEXT: fcmp{{.*}}ugt x86_fp80 {{.*}},{{.*}} + // PRMTD-NEXT: br i1 {{.*}}, label {{.*}}, label {{.*}} + // PRMTD: abs_rhsr_greater_or_equal_abs_rhsi: + // PRMTD-NEXT: fdiv{{.*}} x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fsub{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: br label + // PRMTD: abs_rhsr_less_than_abs_rhsi: + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fsub{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv {{.*}}x86_fp80 + + return a / b; +} + +_Complex long double mulld(_Complex long double a, _Complex long double b) { + // LABEL: define {{.*}} @mulld( + // FULL: call {{.*}} @__mulxc3 + + // FULL_FAST: alloca { x86_fp80, x86_fp80 } + // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // FULL_FAST-NEXT: load x86_fp80, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load x86_fp80, ptr {{.*}} + // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // FULL_FAST-NEXT: load x86_fp80 + // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // FULL_FAST-NEXT: load x86_fp80, ptr {{.*}} + // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 + // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 + // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 + // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 + // FULL_FAST-NEXT: fsub{{.*}}x86_fp80 + // FULL_FAST-NEXT: fadd{{.*}}x86_fp80 + + // BASIC: alloca { x86_fp80, x86_fp80 } + // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // BASIC-NEXT: load x86_fp80, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load x86_fp80, ptr {{.*}} + // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // BASIC-NEXT: load x86_fp80 + // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // BASIC-NEXT: load x86_fp80, ptr {{.*}} + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fmul{{.*}}x86_fp80 + // BASIC-NEXT: fsub{{.*}}x86_fp80 + // BASIC-NEXT: fadd{{.*}}x86_fp80 + // + // IMPRVD: alloca { x86_fp80, x86_fp80 } + // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // IMPRVD-NEXT: load x86_fp80, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load x86_fp80, ptr {{.*}} + // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // IMPRVD-NEXT: load x86_fp80 + // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // IMPRVD-NEXT: load x86_fp80, ptr {{.*}} + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fmul{{.*}}x86_fp80 + // IMPRVD-NEXT: fsub{{.*}}x86_fp80 + // IMPRVD-NEXT: fadd{{.*}}x86_fp80 + // + // PRMTD: alloca { x86_fp80, x86_fp80 } + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load x86_fp80, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load x86_fp80, ptr {{.*}} + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 + // PRMTD-NEXT: load{{.*}}x86_fp80 + // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 + // PRMTD-NEXT: load x86_fp80, ptr {{.*}} + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fmul{{.*}}x86_fp80 + // PRMTD-NEXT: fsub{{.*}}x86_fp80 + // PRMTD-NEXT: fadd{{.*}}x86_fp80 + + return a * b; +} From a9449de4d844a8f843521e1a9a2195afe73d19fb Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Thu, 29 Feb 2024 04:48:51 -0800 Subject: [PATCH 11/18] Set the default value of option to full. --- clang/docs/UsersManual.rst | 3 +-- clang/include/clang/Basic/LangOptions.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 3dcea542c2009..a68a772107f51 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1867,14 +1867,13 @@ floating point semantic models: precise (the default), strict, and fast. sometimes replace the library call if it knows enough about the potential range of the inputs). Overflow and non-finite values are handled by the library implementation. For the case of multiplication overflow will occur in - accordance with normal floating-point rules. + accordance with normal floating-point rules. This is the default value. * ``promoted`` Implementation of complex division using algebraic formulas at higher precision. Overflow is handled. Non-finite values are handled in some cases. If the target does not have native support for a higher precision data type, an implementation for the complex operation will be used to provide improved guards against intermediate overflow, but overflow and underflow may still occur in some cases. NaN and infinite values are not handled. - This is the default value. .. option:: -fcx-limited-range: diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index e372fd7091542..e39ddc330473b 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -403,7 +403,7 @@ class LangOptionsBase { /// runtime library functions(generally the case, but the BE might /// sometimes replace the library call if it knows enough about the /// potential range of the inputs). Overflow and non-finite values are - /// handled by the library implementation. + /// handled by the library implementation. This is the default value. CX_Full, /// Implementation of complex division offering an improved handling @@ -417,7 +417,7 @@ class LangOptionsBase { /// higher precision data type, an implementation for the complex operation /// will be used to provide improved guards against intermediate overflow, /// but overflow and underflow may still occur in some cases. NaN and - /// infinite values are not handled. This is the default value. + /// infinite values are not handled. CX_Promoted, /// Implementation of complex division and multiplication using From e20741e708175674e609bddee3305ce1d4a8e4f9 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Mon, 4 Mar 2024 13:09:45 -0800 Subject: [PATCH 12/18] Wrote a more general function to deal with next larger types and added a warning. --- clang/docs/UsersManual.rst | 6 +- .../clang/Basic/DiagnosticCommonKinds.td | 5 ++ clang/lib/CodeGen/CGExprComplex.cpp | 58 ++++++++++--------- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index a68a772107f51..e9fed48d8c00e 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1871,9 +1871,9 @@ floating point semantic models: precise (the default), strict, and fast. * ``promoted`` Implementation of complex division using algebraic formulas at higher precision. Overflow is handled. Non-finite values are handled in some cases. If the target does not have native support for a higher precision - data type, an implementation for the complex operation will be used to provide - improved guards against intermediate overflow, but overflow and underflow may - still occur in some cases. NaN and infinite values are not handled. + data type, the implementation for the complex operation using the Smith + algorithm will be used. Overflow may still occur in some cases. NaN and + infinite values are not handled. .. option:: -fcx-limited-range: diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index 08bb1d81ba29f..70323801ac3ba 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -45,6 +45,11 @@ def note_using : Note<"using">; def note_possibility : Note<"one possibility">; def note_also_found : Note<"also found">; +def warn_next_larger_fp_type_same_size_than_fp : Warning< + "higher precision floating-point type size has the same size than " + "floating-point type size">, + InGroup>; + // Parse && Lex let CategoryName = "Lexical or Preprocessor Issue" in { diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 500d24f0f82b6..c1b5ab5910d0f 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -283,36 +283,38 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); + QualType GetHigherPrecisionFPType(QualType ElementType) { + const auto *CurrentBT = dyn_cast(ElementType); + switch (CurrentBT->getKind()) { + case BuiltinType::Kind::Float16: + return CGF.getContext().FloatTy; + case BuiltinType::Kind::Float: + case BuiltinType::Kind::BFloat16: + return CGF.getContext().DoubleTy; + case BuiltinType::Kind::Double: + return CGF.getContext().LongDoubleTy; + default: + return ElementType; + } + } + QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType, bool IsDivOpCode) { - const TargetInfo &TI = CGF.getContext().getTargetInfo(); - const LangOptions Opts = CGF.getLangOpts(); - if (const auto *BT = dyn_cast(ElementType)) { - switch (BT->getKind()) { - case BuiltinType::Kind::Float16: { - if (TI.hasFloat16Type() && !TI.hasLegalHalfType()) - return CGF.getContext().getComplexType(CGF.getContext().FloatTy); - break; - } - case BuiltinType::Kind::BFloat16: { - if (TI.hasBFloat16Type() && !TI.hasFullBFloat16Type()) - return CGF.getContext().getComplexType(CGF.getContext().FloatTy); - break; - } - case BuiltinType::Kind::Float: - return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); - break; - case BuiltinType::Kind::Double: { - if (TI.hasLongDoubleType()) - return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); - return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); - break; - } - default: - return QualType(); - } + QualType HigherElementType = GetHigherPrecisionFPType(ElementType); + const llvm::fltSemantics &ElementTypeSemantics = + CGF.getContext().getFloatTypeSemantics(ElementType); + const llvm::fltSemantics &HigherElementTypeSemantics = + CGF.getContext().getFloatTypeSemantics(HigherElementType); + const llvm::Triple TI = CGF.getTarget().getTriple(); + if ((llvm::APFloat::getSizeInBits(HigherElementTypeSemantics) > + llvm::APFloat::getSizeInBits(ElementTypeSemantics)) && + !CGF.getTarget().getTriple().isOSWindows()) { + return CGF.getContext().getComplexType(HigherElementType); + } else { + DiagnosticsEngine &Diags = CGF.CGM.getDiags(); + Diags.Report(diag::warn_next_larger_fp_type_same_size_than_fp); + return CGF.getContext().getComplexType(ElementType); } - return QualType(); } QualType getPromotionType(QualType Ty, bool IsDivOpCode = false) { @@ -1026,7 +1028,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { const BuiltinType *BT = ComplexElementTy->getAs(); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || (Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted && - BT->getKind() == BuiltinType::Kind::LongDouble)) + BT->getKind() == BuiltinType::Kind::Double)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) From 0d97b9bf62ad23001a95fe40c0a4dc505f381ca2 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Mon, 11 Mar 2024 13:11:24 -0700 Subject: [PATCH 13/18] Addressed review comments. --- clang/lib/CodeGen/CGExprComplex.cpp | 17 ++++++++--------- clang/test/CodeGen/cx-complex-range.c | 21 +++++---------------- 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index c1b5ab5910d0f..4ca1c28afec97 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -47,11 +47,12 @@ class ComplexExprEmitter CGBuilderTy &Builder; bool IgnoreReal; bool IgnoreImag; + LangOptions::ComplexRangeKind FPHasBeenPromoted; + public: ComplexExprEmitter(CodeGenFunction &cgf, bool ir=false, bool ii=false) - : CGF(cgf), Builder(CGF.Builder), IgnoreReal(ir), IgnoreImag(ii) { - } - + : CGF(cgf), Builder(CGF.Builder), IgnoreReal(ir), IgnoreImag(ii), + FPHasBeenPromoted(LangOptions::ComplexRangeKind::CX_None) {} //===--------------------------------------------------------------------===// // Utilities @@ -305,12 +306,11 @@ class ComplexExprEmitter CGF.getContext().getFloatTypeSemantics(ElementType); const llvm::fltSemantics &HigherElementTypeSemantics = CGF.getContext().getFloatTypeSemantics(HigherElementType); - const llvm::Triple TI = CGF.getTarget().getTriple(); - if ((llvm::APFloat::getSizeInBits(HigherElementTypeSemantics) > - llvm::APFloat::getSizeInBits(ElementTypeSemantics)) && - !CGF.getTarget().getTriple().isOSWindows()) { + if (llvm::APFloat::semanticsMaxExponent(ElementTypeSemantics) * 2 + 1 <= + llvm::APFloat::semanticsMaxExponent(HigherElementTypeSemantics)) { return CGF.getContext().getComplexType(HigherElementType); } else { + FPHasBeenPromoted = LangOptions::ComplexRangeKind::CX_Improved; DiagnosticsEngine &Diags = CGF.CGM.getDiags(); Diags.Report(diag::warn_next_larger_fp_type_same_size_than_fp); return CGF.getContext().getComplexType(ElementType); @@ -1025,10 +1025,9 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); QualType ComplexElementTy = Op.Ty->castAs()->getElementType(); - const BuiltinType *BT = ComplexElementTy->getAs(); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || (Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted && - BT->getKind() == BuiltinType::Kind::Double)) + FPHasBeenPromoted == LangOptions::CX_Improved)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index d5eac3a43b7fa..035129779fc69 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -227,6 +227,7 @@ _Complex double divd(_Complex double a, _Complex double b) { // IMPRVD-NEXT: fsub{{.*}}double // IMPRVD-NEXT: fdiv{{.*}}double // + // PRMTD: load double, ptr {{.*}} // PRMTD: fpext double {{.*}} to x86_fp80 // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 @@ -235,31 +236,19 @@ _Complex double divd(_Complex double a, _Complex double b) { // PRMTD-NEXT: load double, ptr {{.*}} // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 - // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) - // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) - // PRMTD-NEXT: fcmp{{.*}}ugt x86_fp80 {{.*}},{{.*}} - // PRMTD-NEXT: br i1 {{.*}} - // PRMTD: abs_rhsr_greater_or_equal_abs_rhsi: - // PRMTD-NEXT: fdiv{{.*}} x86_fp80 // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 // PRMTD-NEXT: fmul{{.*}}x86_fp80 // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fsub{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 - // PRMTD-NEXT: br label - // PRMTD: abs_rhsr_less_than_abs_rhsi: - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 // PRMTD-NEXT: fmul{{.*}}x86_fp80 // PRMTD-NEXT: fadd{{.*}}x86_fp80 // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 // PRMTD-NEXT: fmul{{.*}}x86_fp80 // PRMTD-NEXT: fsub{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv {{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fdiv{{.*}}x86_fp80 + // PRMTD-NEXT: fptrunc x86_fp80 {{.*}} to double + // PRMTD-NEXT: fptrunc x86_fp80 {{.*}} to double return a / b; } From bc3fa4fbb180451d8ad76c0beef213d1b25ec427 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Mon, 11 Mar 2024 13:17:52 -0700 Subject: [PATCH 14/18] Fix format. --- clang/lib/CodeGen/CGExprComplex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 4ca1c28afec97..e46448e3ab516 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -50,7 +50,7 @@ class ComplexExprEmitter LangOptions::ComplexRangeKind FPHasBeenPromoted; public: - ComplexExprEmitter(CodeGenFunction &cgf, bool ir=false, bool ii=false) + ComplexExprEmitter(CodeGenFunction &cgf, bool ir = false, bool ii = false) : CGF(cgf), Builder(CGF.Builder), IgnoreReal(ir), IgnoreImag(ii), FPHasBeenPromoted(LangOptions::ComplexRangeKind::CX_None) {} From 3117dbd271862ce8975aede2ee05b9ee77ede5cc Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Fri, 15 Mar 2024 11:53:44 -0700 Subject: [PATCH 15/18] Addressed some of the review comments and working on the rest. --- clang/include/clang/Driver/Options.td | 28 +++++++++++---------------- clang/lib/CodeGen/CGExprComplex.cpp | 14 ++++++++++---- clang/lib/Driver/ToolChains/Clang.cpp | 6 ++++-- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0bb57a7881db2..5540baef06bfa 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1057,23 +1057,17 @@ def complex_range_EQ : Joined<["-"], "complex-range=">, Group, NormalizedValues<["CX_Full", "CX_Improved", "CX_Promoted", "CX_Basic"]>, MarshallingInfoEnum, "CX_Full">; -def fcx_limited_range : Flag<["-"], "fcx-limited-range">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Basic algebraic expansions of complex arithmetic operations " - "involving are enabled.">; - -def fno_cx_limited_range : Flag<["-"], "fno-cx-limited-range">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Basic algebraic expansions of complex arithmetic operations " - "involving are disabled.">; - -def fcx_fortran_rules : Flag<["-"], "fcx-fortran-rules">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Range reduction is enabled for complex arithmetic operations.">; - -def fno_cx_fortran_rules : Flag<["-"], "fno-cx-fortran-rules">, - Group, Visibility<[ClangOption, CC1Option]>, - HelpText<"Range reduction is disabled for complex arithmetic operations.">; +defm cx_limited_range: BoolOptionWithoutMarshalling<"f", "cx-limited-range", + PosFlag, + NegFlag>; + +defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules", + PosFlag, + NegFlag>; // OpenCL-only Options def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group, diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 87adf20236351..35fe1ba5b51fa 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -310,6 +310,13 @@ class ComplexExprEmitter CGF.getContext().getFloatTypeSemantics(ElementType); const llvm::fltSemantics &HigherElementTypeSemantics = CGF.getContext().getFloatTypeSemantics(HigherElementType); + // Check that LongDouble Size > Double Size. + // This can be interpreted as: + // SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal <= + // LargerType.LargestFiniteVal. + // In terms of exponent it gives this formula: + // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal + // doubles the exponent of SmallerType.LargestFiniteVal); if (llvm::APFloat::semanticsMaxExponent(ElementTypeSemantics) * 2 + 1 <= llvm::APFloat::semanticsMaxExponent(HigherElementTypeSemantics)) { return CGF.getContext().getComplexType(HigherElementType); @@ -1036,10 +1043,9 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi); - else if (!CGF.getLangOpts().FastMath || - // '-ffast-math' is used in the command line but followed by an - // '-fno-cx-limited-range' or '-fcomplex-arithmetic=full'. - Op.FPFeatures.getComplexRange() == LangOptions::CX_Full) { + // '-ffast-math' is used in the command line but followed by an + // '-fno-cx-limited-range' or '-fcomplex-arithmetic=full'. + else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Full) { LHSi = OrigLHSi; // If we have a complex operand on the RHS and FastMath is not allowed, we // delegate to a libcall to handle all of the complexities and minimize diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2e6acad112234..e0164d9fed7fd 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2794,8 +2794,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // ffast-math enables basic range rules for complex multiplication and // division. // Warn if user expects to perform full implementation of complex - // multiplication or division in the presence of nnan or ninf flags. - if (Range == LangOptions::ComplexRangeKind::CX_Full) + // multiplication or division in the presence of nan or ninf flags. + if (Range == LangOptions::ComplexRangeKind::CX_Full || + Range == LangOptions::ComplexRangeKind::CX_Improved || + Range == LangOptions::ComplexRangeKind::CX_Promoted) EmitComplexRangeDiag( D, ComplexArithmeticStr(Range), !GccRangeComplexOption.empty() From 0a08598dfda3b64c6b358bc2d2c05cdc9770ced8 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Fri, 15 Mar 2024 14:00:27 -0700 Subject: [PATCH 16/18] Addressed a few more comments. The last issue still WIP is the variable FPHasBeenPromoted. --- clang/lib/CodeGen/CGExprComplex.cpp | 11 +- clang/lib/Driver/ToolChains/Clang.cpp | 8 +- clang/test/CodeGen/cx-complex-range.c | 26 +--- clang/test/CodeGen/nofpclass.c | 164 ++++++++++++++++++++------ clang/test/Driver/range.c | 39 +++--- 5 files changed, 154 insertions(+), 94 deletions(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 35fe1ba5b51fa..7dddb9fcfd06f 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -310,13 +310,13 @@ class ComplexExprEmitter CGF.getContext().getFloatTypeSemantics(ElementType); const llvm::fltSemantics &HigherElementTypeSemantics = CGF.getContext().getFloatTypeSemantics(HigherElementType); - // Check that LongDouble Size > Double Size. - // This can be interpreted as: - // SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal <= + // Check that the promoted type can handle the intermediate values without + // overflowing. This can be interpreted as: + // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <= // LargerType.LargestFiniteVal. // In terms of exponent it gives this formula: // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal - // doubles the exponent of SmallerType.LargestFiniteVal); + // doubles the exponent of SmallerType.LargestFiniteVal) if (llvm::APFloat::semanticsMaxExponent(ElementTypeSemantics) * 2 + 1 <= llvm::APFloat::semanticsMaxExponent(HigherElementTypeSemantics)) { return CGF.getContext().getComplexType(HigherElementType); @@ -843,8 +843,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted || - CGF.getLangOpts().NoHonorInfs || CGF.getLangOpts().NoHonorNaNs) + Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) return ComplexPairTy(ResR, ResI); // Emit the test for the real part becoming NaN and create a branch to diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e0164d9fed7fd..671a10aefe2eb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2875,12 +2875,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, RangeVal = LangOptions::ComplexRangeKind::CX_Promoted; else if (Val.equals("basic")) RangeVal = LangOptions::ComplexRangeKind::CX_Basic; - else + else { D.Diag(diag::err_drv_unsupported_option_argument) - << A->getSpelling() << LangOptions::ComplexRangeKind::CX_None; - if (GccRangeComplexOption.empty() && !SeenUnsafeMathModeOption) { - EmitComplexRangeDiag(D, ComplexArithmeticStr(Range), - ComplexArithmeticStr(RangeVal)); + << A->getSpelling() << Val; + break; } if (!GccRangeComplexOption.empty()) { if (GccRangeComplexOption.compare("-fcx-limited-range") != 0) { diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index 035129779fc69..516220a0dbdfe 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -402,30 +402,8 @@ _Complex _Float16 mulf16(_Complex _Float16 a, _Complex _Float16 b) { // LABEL: define {{.*}} @mulf16( // FULL: call {{.*}} @__mulsc3 // - // FULL_FAST: alloca { half, half } - // FULL_FAST-NEXT: alloca { half, half } - // FULL_FAST-NEXT: alloca { half, half } - // FULL_FAST: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // FULL_FAST-NEXT: load half, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load half, ptr {{.*}} - // FULL_FAST-NEXT: fpext half {{.*}} to float - // FULL_FAST-NEXT: fpext half {{.*}} to float - // FULL_FAST-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // FULL_FAST-NEXT: load half, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load half - // FULL_FAST-NEXT: fpext half {{.*}} to float - // FULL_FAST-NEXT: fpext half {{.*}} to float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fsub{{.*}}float - // FULL_FAST-NEXT: fadd{{.*}}float - // FULL_FAST-NEXT: fptrunc float {{.*}} to half - // FULL_FAST-NEXT: fptrunc float {{.*}} to half - + // FULL_FAST: call {{.*}} @__mulsc3 + // // BASIC: alloca { half, half } // BASIC-NEXT: alloca { half, half } // BASIC: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c index 9c5617924feac..dd90d02f7759b 100644 --- a/clang/test/CodeGen/nofpclass.c +++ b/clang/test/CodeGen/nofpclass.c @@ -542,22 +542,35 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // CFINITEONLY-NEXT: [[MUL_BC:%.*]] = fmul nnan ninf double [[C_IMAG]], [[C_REAL2]] // CFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf double [[MUL_AC]], [[MUL_BD]] // CFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf double [[MUL_AD]], [[MUL_BC]] +// CFINITEONLY-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan ninf uno double [[MUL_R]], [[MUL_R]] +// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// CFINITEONLY: complex_mul_imag_nan: +// CFINITEONLY-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan ninf uno double [[MUL_I]], [[MUL_I]] +// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// CFINITEONLY: complex_mul_libcall: +// CFINITEONLY-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] +// CFINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 +// CFINITEONLY-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 +// CFINITEONLY-NEXT: br label [[COMPLEX_MUL_CONT]] +// CFINITEONLY: complex_mul_cont: +// CFINITEONLY-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan ninf double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP2]], [[COMPLEX_MUL_LIBCALL]] ] +// CFINITEONLY-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan ninf double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP3]], [[COMPLEX_MUL_LIBCALL]] ] // CFINITEONLY-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 // CFINITEONLY-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 -// CFINITEONLY-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 -// CFINITEONLY-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 -// CFINITEONLY-NEXT: [[TMP2:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 -// CFINITEONLY-NEXT: ret { double, double } [[TMP2]] +// CFINITEONLY-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 +// CFINITEONLY-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 +// CFINITEONLY-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// CFINITEONLY-NEXT: ret { double, double } [[TMP4]] // // CLFINITEONLY: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) // CLFINITEONLY-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret // CLFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[C_COERCE0:%.*]], double noundef nofpclass(nan inf) [[C_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CLFINITEONLY-NEXT: entry: +// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf double [[C_COERCE0]], [[C_COERCE1]] +// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf double [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[MUL_AC:%.*]] = fmul nnan ninf double [[C_COERCE0]], [[C_COERCE0]] // CLFINITEONLY-NEXT: [[MUL_BD:%.*]] = fmul nnan ninf double [[C_COERCE1]], [[C_COERCE1]] -// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf double [[C_COERCE0]], [[C_COERCE1]] // CLFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf double [[MUL_AC]], [[MUL_BD]] -// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf double [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { double, double } poison, double [[MUL_R]], 0 // CLFINITEONLY-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { double, double } [[DOTFCA_0_INSERT]], double [[MUL_I]], 1 // CLFINITEONLY-NEXT: ret { double, double } [[DOTFCA_1_INSERT]] @@ -586,12 +599,25 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // NONANS-NEXT: [[MUL_BC:%.*]] = fmul nnan double [[C_IMAG]], [[C_REAL2]] // NONANS-NEXT: [[MUL_R:%.*]] = fsub nnan double [[MUL_AC]], [[MUL_BD]] // NONANS-NEXT: [[MUL_I:%.*]] = fadd nnan double [[MUL_AD]], [[MUL_BC]] +// NONANS-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan uno double [[MUL_R]], [[MUL_R]] +// NONANS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// NONANS: complex_mul_imag_nan: +// NONANS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan uno double [[MUL_I]], [[MUL_I]] +// NONANS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// NONANS: complex_mul_libcall: +// NONANS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] +// NONANS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 +// NONANS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 +// NONANS-NEXT: br label [[COMPLEX_MUL_CONT]] +// NONANS: complex_mul_cont: +// NONANS-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP2]], [[COMPLEX_MUL_LIBCALL]] ] +// NONANS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP3]], [[COMPLEX_MUL_LIBCALL]] ] // NONANS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 // NONANS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 -// NONANS-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 -// NONANS-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 -// NONANS-NEXT: [[TMP2:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 -// NONANS-NEXT: ret { double, double } [[TMP2]] +// NONANS-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 +// NONANS-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 +// NONANS-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// NONANS-NEXT: ret { double, double } [[TMP4]] // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret @@ -617,12 +643,25 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // NOINFS-NEXT: [[MUL_BC:%.*]] = fmul ninf double [[C_IMAG]], [[C_REAL2]] // NOINFS-NEXT: [[MUL_R:%.*]] = fsub ninf double [[MUL_AC]], [[MUL_BD]] // NOINFS-NEXT: [[MUL_I:%.*]] = fadd ninf double [[MUL_AD]], [[MUL_BC]] +// NOINFS-NEXT: [[ISNAN_CMP:%.*]] = fcmp ninf uno double [[MUL_R]], [[MUL_R]] +// NOINFS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// NOINFS: complex_mul_imag_nan: +// NOINFS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp ninf uno double [[MUL_I]], [[MUL_I]] +// NOINFS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// NOINFS: complex_mul_libcall: +// NOINFS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] +// NOINFS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 +// NOINFS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 +// NOINFS-NEXT: br label [[COMPLEX_MUL_CONT]] +// NOINFS: complex_mul_cont: +// NOINFS-NEXT: [[REAL_MUL_PHI:%.*]] = phi ninf double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP2]], [[COMPLEX_MUL_LIBCALL]] ] +// NOINFS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi ninf double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP3]], [[COMPLEX_MUL_LIBCALL]] ] // NOINFS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 // NOINFS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 -// NOINFS-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 -// NOINFS-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 -// NOINFS-NEXT: [[TMP2:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 -// NOINFS-NEXT: ret { double, double } [[TMP2]] +// NOINFS-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 +// NOINFS-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 +// NOINFS-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// NOINFS-NEXT: ret { double, double } [[TMP4]] // _Complex double defined_complex_func_f64_ret(_Complex double c) { return c * c; @@ -630,10 +669,11 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // CFINITEONLY: Function Attrs: noinline nounwind optnone // CFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <2 x half> @defined_complex_func_f16_ret -// CFINITEONLY-SAME: (<2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR5:[0-9]+]] { +// CFINITEONLY-SAME: (<2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR4]] { // CFINITEONLY-NEXT: entry: // CFINITEONLY-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 // CFINITEONLY-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// CFINITEONLY-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 // CFINITEONLY-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 // CFINITEONLY-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 // CFINITEONLY-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 @@ -653,12 +693,28 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // CFINITEONLY-NEXT: [[MUL_BC:%.*]] = fmul nnan ninf float [[EXT1]], [[EXT6]] // CFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf float [[MUL_AC]], [[MUL_BD]] // CFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf float [[MUL_AD]], [[MUL_BC]] -// CFINITEONLY-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half -// CFINITEONLY-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half +// CFINITEONLY-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan ninf uno float [[MUL_R]], [[MUL_R]] +// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] +// CFINITEONLY: complex_mul_imag_nan: +// CFINITEONLY-NEXT: [[ISNAN_CMP8:%.*]] = fcmp nnan ninf uno float [[MUL_I]], [[MUL_I]] +// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP8]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// CFINITEONLY: complex_mul_libcall: +// CFINITEONLY-NEXT: [[CALL:%.*]] = call nnan ninf nofpclass(nan inf) <2 x float> @__mulsc3(float noundef nofpclass(nan inf) [[EXT]], float noundef nofpclass(nan inf) [[EXT1]], float noundef nofpclass(nan inf) [[EXT6]], float noundef nofpclass(nan inf) [[EXT7]]) #[[ATTR7]] +// CFINITEONLY-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// CFINITEONLY-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// CFINITEONLY-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// CFINITEONLY-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// CFINITEONLY-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// CFINITEONLY-NEXT: br label [[COMPLEX_MUL_CONT]] +// CFINITEONLY: complex_mul_cont: +// CFINITEONLY-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan ninf float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// CFINITEONLY-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan ninf float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// CFINITEONLY-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half +// CFINITEONLY-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half // CFINITEONLY-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 // CFINITEONLY-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 // CFINITEONLY-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 -// CFINITEONLY-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 +// CFINITEONLY-NEXT: store half [[UNPROMOTION9]], ptr [[RETVAL_IMAGP]], align 2 // CFINITEONLY-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 // CFINITEONLY-NEXT: ret <2 x half> [[TMP0]] // @@ -667,26 +723,27 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // CLFINITEONLY-SAME: (<2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { // CLFINITEONLY-NEXT: entry: // CLFINITEONLY-NEXT: [[C_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x half> [[C_COERCE]], i64 0 -// CLFINITEONLY-NEXT: [[C_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <2 x half> [[C_COERCE]], i64 1 // CLFINITEONLY-NEXT: [[EXT:%.*]] = fpext half [[C_SROA_0_0_VEC_EXTRACT]] to float +// CLFINITEONLY-NEXT: [[C_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <2 x half> [[C_COERCE]], i64 1 // CLFINITEONLY-NEXT: [[EXT1:%.*]] = fpext half [[C_SROA_0_2_VEC_EXTRACT]] to float +// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf float [[EXT]], [[EXT1]] +// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf float [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[MUL_AC:%.*]] = fmul nnan ninf float [[EXT]], [[EXT]] // CLFINITEONLY-NEXT: [[MUL_BD:%.*]] = fmul nnan ninf float [[EXT1]], [[EXT1]] -// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf float [[EXT]], [[EXT1]] // CLFINITEONLY-NEXT: [[MUL_R:%.*]] = fsub nnan ninf float [[MUL_AC]], [[MUL_BD]] -// CLFINITEONLY-NEXT: [[MUL_I:%.*]] = fadd nnan ninf float [[MUL_AD]], [[MUL_AD]] // CLFINITEONLY-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half -// CLFINITEONLY-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half +// CLFINITEONLY-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[MUL_I]] to half // CLFINITEONLY-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[UNPROMOTION]], i64 0 -// CLFINITEONLY-NEXT: [[RETVAL_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[RETVAL_SROA_0_0_VEC_INSERT]], half [[UNPROMOTION8]], i64 1 +// CLFINITEONLY-NEXT: [[RETVAL_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[RETVAL_SROA_0_0_VEC_INSERT]], half [[UNPROMOTION9]], i64 1 // CLFINITEONLY-NEXT: ret <2 x half> [[RETVAL_SROA_0_2_VEC_INSERT]] // // NONANS: Function Attrs: noinline nounwind optnone // NONANS-LABEL: define dso_local nofpclass(nan) <2 x half> @defined_complex_func_f16_ret -// NONANS-SAME: (<2 x half> noundef nofpclass(nan) [[C_COERCE:%.*]]) #[[ATTR5:[0-9]+]] { +// NONANS-SAME: (<2 x half> noundef nofpclass(nan) [[C_COERCE:%.*]]) #[[ATTR4]] { // NONANS-NEXT: entry: // NONANS-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 // NONANS-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// NONANS-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 // NONANS-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 // NONANS-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 // NONANS-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 @@ -706,21 +763,38 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // NONANS-NEXT: [[MUL_BC:%.*]] = fmul nnan float [[EXT1]], [[EXT6]] // NONANS-NEXT: [[MUL_R:%.*]] = fsub nnan float [[MUL_AC]], [[MUL_BD]] // NONANS-NEXT: [[MUL_I:%.*]] = fadd nnan float [[MUL_AD]], [[MUL_BC]] -// NONANS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half -// NONANS-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half +// NONANS-NEXT: [[ISNAN_CMP:%.*]] = fcmp nnan uno float [[MUL_R]], [[MUL_R]] +// NONANS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] +// NONANS: complex_mul_imag_nan: +// NONANS-NEXT: [[ISNAN_CMP8:%.*]] = fcmp nnan uno float [[MUL_I]], [[MUL_I]] +// NONANS-NEXT: br i1 [[ISNAN_CMP8]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// NONANS: complex_mul_libcall: +// NONANS-NEXT: [[CALL:%.*]] = call nnan nofpclass(nan) <2 x float> @__mulsc3(float noundef nofpclass(nan) [[EXT]], float noundef nofpclass(nan) [[EXT1]], float noundef nofpclass(nan) [[EXT6]], float noundef nofpclass(nan) [[EXT7]]) #[[ATTR7]] +// NONANS-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// NONANS-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// NONANS-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// NONANS-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// NONANS-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// NONANS-NEXT: br label [[COMPLEX_MUL_CONT]] +// NONANS: complex_mul_cont: +// NONANS-NEXT: [[REAL_MUL_PHI:%.*]] = phi nnan float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// NONANS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi nnan float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// NONANS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half +// NONANS-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half // NONANS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 // NONANS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 // NONANS-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 -// NONANS-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 +// NONANS-NEXT: store half [[UNPROMOTION9]], ptr [[RETVAL_IMAGP]], align 2 // NONANS-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 // NONANS-NEXT: ret <2 x half> [[TMP0]] // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local nofpclass(inf) <2 x half> @defined_complex_func_f16_ret -// NOINFS-SAME: (<2 x half> noundef nofpclass(inf) [[C_COERCE:%.*]]) #[[ATTR5:[0-9]+]] { +// NOINFS-SAME: (<2 x half> noundef nofpclass(inf) [[C_COERCE:%.*]]) #[[ATTR4]] { // NOINFS-NEXT: entry: // NOINFS-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 // NOINFS-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// NOINFS-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 // NOINFS-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 // NOINFS-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 // NOINFS-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 @@ -740,12 +814,28 @@ _Complex double defined_complex_func_f64_ret(_Complex double c) { // NOINFS-NEXT: [[MUL_BC:%.*]] = fmul ninf float [[EXT1]], [[EXT6]] // NOINFS-NEXT: [[MUL_R:%.*]] = fsub ninf float [[MUL_AC]], [[MUL_BD]] // NOINFS-NEXT: [[MUL_I:%.*]] = fadd ninf float [[MUL_AD]], [[MUL_BC]] -// NOINFS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half -// NOINFS-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[MUL_I]] to half +// NOINFS-NEXT: [[ISNAN_CMP:%.*]] = fcmp ninf uno float [[MUL_R]], [[MUL_R]] +// NOINFS-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] +// NOINFS: complex_mul_imag_nan: +// NOINFS-NEXT: [[ISNAN_CMP8:%.*]] = fcmp ninf uno float [[MUL_I]], [[MUL_I]] +// NOINFS-NEXT: br i1 [[ISNAN_CMP8]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// NOINFS: complex_mul_libcall: +// NOINFS-NEXT: [[CALL:%.*]] = call ninf nofpclass(inf) <2 x float> @__mulsc3(float noundef nofpclass(inf) [[EXT]], float noundef nofpclass(inf) [[EXT1]], float noundef nofpclass(inf) [[EXT6]], float noundef nofpclass(inf) [[EXT7]]) #[[ATTR7]] +// NOINFS-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// NOINFS-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// NOINFS-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// NOINFS-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// NOINFS-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// NOINFS-NEXT: br label [[COMPLEX_MUL_CONT]] +// NOINFS: complex_mul_cont: +// NOINFS-NEXT: [[REAL_MUL_PHI:%.*]] = phi ninf float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// NOINFS-NEXT: [[IMAG_MUL_PHI:%.*]] = phi ninf float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// NOINFS-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half +// NOINFS-NEXT: [[UNPROMOTION9:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half // NOINFS-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 // NOINFS-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 // NOINFS-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 -// NOINFS-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 +// NOINFS-NEXT: store half [[UNPROMOTION9]], ptr [[RETVAL_IMAGP]], align 2 // NOINFS-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 // NOINFS-NEXT: ret <2 x half> [[TMP0]] // @@ -1263,7 +1353,7 @@ extern __m256d extern_m256d(__m256d, ...); // CFINITEONLY: Function Attrs: noinline nounwind optnone // CFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <4 x double> @call_m256d -// CFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { +// CFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR5:[0-9]+]] { // CFINITEONLY-NEXT: entry: // CFINITEONLY-NEXT: [[X_ADDR:%.*]] = alloca <4 x double>, align 32 // CFINITEONLY-NEXT: store <4 x double> [[X]], ptr [[X_ADDR]], align 32 @@ -1281,7 +1371,7 @@ extern __m256d extern_m256d(__m256d, ...); // // NONANS: Function Attrs: noinline nounwind optnone // NONANS-LABEL: define dso_local nofpclass(nan) <4 x double> @call_m256d -// NONANS-SAME: (<4 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR6:[0-9]+]] { +// NONANS-SAME: (<4 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR5:[0-9]+]] { // NONANS-NEXT: entry: // NONANS-NEXT: [[X_ADDR:%.*]] = alloca <4 x double>, align 32 // NONANS-NEXT: store <4 x double> [[X]], ptr [[X_ADDR]], align 32 @@ -1292,7 +1382,7 @@ extern __m256d extern_m256d(__m256d, ...); // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local nofpclass(inf) <4 x double> @call_m256d -// NOINFS-SAME: (<4 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { +// NOINFS-SAME: (<4 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR5:[0-9]+]] { // NOINFS-NEXT: entry: // NOINFS-NEXT: [[X_ADDR:%.*]] = alloca <4 x double>, align 32 // NOINFS-NEXT: store <4 x double> [[X]], ptr [[X_ADDR]], align 32 @@ -1307,7 +1397,7 @@ __m256d call_m256d(__m256d x) { // CFINITEONLY: Function Attrs: noinline nounwind optnone // CFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <25 x double> @call_matrix -// CFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR7:[0-9]+]] { +// CFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { // CFINITEONLY-NEXT: entry: // CFINITEONLY-NEXT: [[X_ADDR:%.*]] = alloca [25 x double], align 8 // CFINITEONLY-NEXT: store <25 x double> [[X]], ptr [[X_ADDR]], align 8 @@ -1324,7 +1414,7 @@ __m256d call_m256d(__m256d x) { // // NONANS: Function Attrs: noinline nounwind optnone // NONANS-LABEL: define dso_local nofpclass(nan) <25 x double> @call_matrix -// NONANS-SAME: (<25 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR7:[0-9]+]] { +// NONANS-SAME: (<25 x double> noundef nofpclass(nan) [[X:%.*]]) #[[ATTR6:[0-9]+]] { // NONANS-NEXT: entry: // NONANS-NEXT: [[X_ADDR:%.*]] = alloca [25 x double], align 8 // NONANS-NEXT: store <25 x double> [[X]], ptr [[X_ADDR]], align 8 @@ -1334,7 +1424,7 @@ __m256d call_m256d(__m256d x) { // // NOINFS: Function Attrs: noinline nounwind optnone // NOINFS-LABEL: define dso_local nofpclass(inf) <25 x double> @call_matrix -// NOINFS-SAME: (<25 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR7:[0-9]+]] { +// NOINFS-SAME: (<25 x double> noundef nofpclass(inf) [[X:%.*]]) #[[ATTR6:[0-9]+]] { // NOINFS-NEXT: entry: // NOINFS-NEXT: [[X_ADDR:%.*]] = alloca [25 x double], align 8 // NOINFS-NEXT: store <25 x double> [[X]], ptr [[X_ADDR]], align 8 diff --git a/clang/test/Driver/range.c b/clang/test/Driver/range.c index 7a99c3ce24862..da5748d7c723c 100644 --- a/clang/test/Driver/range.c +++ b/clang/test/Driver/range.c @@ -67,6 +67,9 @@ // RUN: %clang -### -target x86_64 -ffast-math -fno-cx-limited-range \ // RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s +// RUN: not %clang -### -target x86_64 -fcomplex-arithmetic=foo -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=ERR %s + // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=BASIC %s @@ -85,7 +88,7 @@ // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN5 %s +// RUN: | FileCheck --check-prefix=IMPRVD %s // RUN: %clang -### -target x86_64 -fcx-limited-range \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ @@ -97,28 +100,28 @@ // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN8 %s +// RUN: | FileCheck --check-prefix=FULL %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=basic \ // RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN9 %s +// RUN: | FileCheck --check-prefix=PRMTD %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ // RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN10 %s +// RUN: | FileCheck --check-prefix=BASIC %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN11 %s +// RUN: | FileCheck --check-prefix=FULL %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=improved \ // RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN12 %s +// RUN: | FileCheck --check-prefix=PRMTD %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN13 %s +// RUN: | FileCheck --check-prefix=BASIC %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcx-limited-range -c %s 2>&1 \ @@ -126,26 +129,26 @@ // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN15 %s +// RUN: | FileCheck --check-prefix=IMPRVD %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=promoted \ // RUN: -fcomplex-arithmetic=full -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN16 %s +// RUN: | FileCheck --check-prefix=FULL %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -fcomplex-arithmetic=basic -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN17 %s +// RUN: | FileCheck --check-prefix=BASIC %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -ffast-math -c %s 2>&1 | FileCheck --check-prefix=WARN17 %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -fcomplex-arithmetic=improved -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN18 %s +// RUN: | FileCheck --check-prefix=IMPRVD %s // RUN: %clang -### -target x86_64 -fcomplex-arithmetic=full \ // RUN: -fcomplex-arithmetic=promoted -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN19 %s +// RUN: | FileCheck --check-prefix=PRMTD %s // RUN: %clang -### -target x86_64 -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=BASIC %s @@ -190,16 +193,8 @@ // WARN5: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] // WARN6: warning: overriding '-fcx-limited-range' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] // WARN7: warning: overriding '-fcx-fortran-rules' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN8: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN9: warning: overriding '-fcomplex-arithmetic=basic' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] -// WARN10: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN11: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=full' [-Woverriding-option] -// WARN12: warning: overriding '-fcomplex-arithmetic=improved' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] -// WARN13: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] // WARN14: overriding '-complex-range=promoted' option with '-fcx-limited-range' [-Woverriding-option] -// WARN15: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] -// WARN16: warning: overriding '-fcomplex-arithmetic=promoted' option with '-fcomplex-arithmetic=full' [-Woverriding-option] // WARN17: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=basic' [-Woverriding-option] -// WARN18: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=improved' [-Woverriding-option] -// WARN19: warning: overriding '-fcomplex-arithmetic=full' option with '-fcomplex-arithmetic=promoted' [-Woverriding-option] // WARN20: warning: overriding '-fcx-fortran-rules' option with '-fcx-limited-range' [-Woverriding-option] + +// ERR: error: unsupported argument 'foo' to option '-fcomplex-arithmetic=' From a558d3160a208e790306f83dd01738e63ebb6b93 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Mon, 18 Mar 2024 05:41:37 -0700 Subject: [PATCH 17/18] Added LIT tests. --- clang/test/CodeGen/X86/cx-complex-range.c | 1425 ++++++++ clang/test/CodeGen/cx-complex-range.c | 3966 ++++++++++++++++++--- 2 files changed, 4818 insertions(+), 573 deletions(-) create mode 100644 clang/test/CodeGen/X86/cx-complex-range.c diff --git a/clang/test/CodeGen/X86/cx-complex-range.c b/clang/test/CodeGen/X86/cx-complex-range.c new file mode 100644 index 0000000000000..fa46576266a20 --- /dev/null +++ b/clang/test/CodeGen/X86/cx-complex-range.c @@ -0,0 +1,1425 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -o - | FileCheck %s --check-prefix=FULL + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -complex-range=basic -o - | FileCheck %s --check-prefix=BASIC + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -fno-cx-limited-range -o - | FileCheck %s --check-prefix=FULL + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -complex-range=improved -o - | FileCheck %s --check-prefix=IMPRVD + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -complex-range=promoted -o - | FileCheck %s --check-prefix=PRMTD + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -complex-range=full -o - | FileCheck %s --check-prefix=FULL + +// RUN: %clang_cc1 -triple x86_64-windows-pc -complex-range=promoted \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=X86WINPRMTD + +// Fast math +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -ffast-math -complex-range=basic -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=BASIC_FAST + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -ffast-math -complex-range=full -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=FULL_FAST + +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ +// RUN: -fno-cx-fortran-rules -o - | FileCheck %s --check-prefix=FULL + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -ffast-math -complex-range=improved -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=IMPRVD_FAST + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -ffast-math -complex-range=promoted -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=PRMTD_FAST + +// FULL-LABEL: define dso_local <2 x half> @divf16( +// FULL-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// FULL-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// FULL-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// FULL-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// FULL-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// FULL-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// FULL-NEXT: [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR1:[0-9]+]] +// FULL-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half +// FULL-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[COERCE_IMAG]] to half +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// FULL-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// FULL-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// FULL-NEXT: ret <2 x half> [[TMP0]] +// +// BASIC-LABEL: define dso_local <2 x half> @divf16( +// BASIC-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// BASIC-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// BASIC-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// BASIC-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// BASIC-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// BASIC-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// BASIC-NEXT: [[TMP0:%.*]] = fmul float [[EXT]], [[EXT2]] +// BASIC-NEXT: [[TMP1:%.*]] = fmul float [[EXT1]], [[EXT3]] +// BASIC-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]] +// BASIC-NEXT: [[TMP3:%.*]] = fmul float [[EXT2]], [[EXT2]] +// BASIC-NEXT: [[TMP4:%.*]] = fmul float [[EXT3]], [[EXT3]] +// BASIC-NEXT: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]] +// BASIC-NEXT: [[TMP6:%.*]] = fmul float [[EXT1]], [[EXT2]] +// BASIC-NEXT: [[TMP7:%.*]] = fmul float [[EXT]], [[EXT3]] +// BASIC-NEXT: [[TMP8:%.*]] = fsub float [[TMP6]], [[TMP7]] +// BASIC-NEXT: [[TMP9:%.*]] = fdiv float [[TMP2]], [[TMP5]] +// BASIC-NEXT: [[TMP10:%.*]] = fdiv float [[TMP8]], [[TMP5]] +// BASIC-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP9]] to half +// BASIC-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[TMP10]] to half +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// BASIC-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// BASIC-NEXT: [[TMP11:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// BASIC-NEXT: ret <2 x half> [[TMP11]] +// +// IMPRVD-LABEL: define dso_local <2 x half> @divf16( +// IMPRVD-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// IMPRVD-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// IMPRVD-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// IMPRVD-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// IMPRVD-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// IMPRVD-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// IMPRVD-NEXT: [[TMP0:%.*]] = call float @llvm.fabs.f32(float [[EXT2]]) +// IMPRVD-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[EXT3]]) +// IMPRVD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP0]], [[TMP1]] +// IMPRVD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD-NEXT: [[TMP2:%.*]] = fdiv float [[EXT3]], [[EXT2]] +// IMPRVD-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], [[EXT3]] +// IMPRVD-NEXT: [[TMP4:%.*]] = fadd float [[EXT2]], [[TMP3]] +// IMPRVD-NEXT: [[TMP5:%.*]] = fmul float [[EXT1]], [[TMP2]] +// IMPRVD-NEXT: [[TMP6:%.*]] = fadd float [[EXT]], [[TMP5]] +// IMPRVD-NEXT: [[TMP7:%.*]] = fdiv float [[TMP6]], [[TMP4]] +// IMPRVD-NEXT: [[TMP8:%.*]] = fmul float [[EXT]], [[TMP2]] +// IMPRVD-NEXT: [[TMP9:%.*]] = fsub float [[EXT1]], [[TMP8]] +// IMPRVD-NEXT: [[TMP10:%.*]] = fdiv float [[TMP9]], [[TMP4]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi: +// IMPRVD-NEXT: [[TMP11:%.*]] = fdiv float [[EXT2]], [[EXT3]] +// IMPRVD-NEXT: [[TMP12:%.*]] = fmul float [[TMP11]], [[EXT2]] +// IMPRVD-NEXT: [[TMP13:%.*]] = fadd float [[EXT3]], [[TMP12]] +// IMPRVD-NEXT: [[TMP14:%.*]] = fmul float [[EXT]], [[TMP11]] +// IMPRVD-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[EXT1]] +// IMPRVD-NEXT: [[TMP16:%.*]] = fdiv float [[TMP15]], [[TMP13]] +// IMPRVD-NEXT: [[TMP17:%.*]] = fmul float [[EXT1]], [[TMP11]] +// IMPRVD-NEXT: [[TMP18:%.*]] = fsub float [[TMP17]], [[EXT]] +// IMPRVD-NEXT: [[TMP19:%.*]] = fdiv float [[TMP18]], [[TMP13]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD: complex_div: +// IMPRVD-NEXT: [[TMP20:%.*]] = phi float [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[TMP21:%.*]] = phi float [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP20]] to half +// IMPRVD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[TMP21]] to half +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// IMPRVD-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// IMPRVD-NEXT: [[TMP22:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// IMPRVD-NEXT: ret <2 x half> [[TMP22]] +// +// PRMTD-LABEL: define dso_local <2 x half> @divf16( +// PRMTD-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// PRMTD-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// PRMTD-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// PRMTD-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// PRMTD-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// PRMTD-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// PRMTD-NEXT: [[TMP0:%.*]] = fmul float [[EXT]], [[EXT2]] +// PRMTD-NEXT: [[TMP1:%.*]] = fmul float [[EXT1]], [[EXT3]] +// PRMTD-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]] +// PRMTD-NEXT: [[TMP3:%.*]] = fmul float [[EXT2]], [[EXT2]] +// PRMTD-NEXT: [[TMP4:%.*]] = fmul float [[EXT3]], [[EXT3]] +// PRMTD-NEXT: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]] +// PRMTD-NEXT: [[TMP6:%.*]] = fmul float [[EXT1]], [[EXT2]] +// PRMTD-NEXT: [[TMP7:%.*]] = fmul float [[EXT]], [[EXT3]] +// PRMTD-NEXT: [[TMP8:%.*]] = fsub float [[TMP6]], [[TMP7]] +// PRMTD-NEXT: [[TMP9:%.*]] = fdiv float [[TMP2]], [[TMP5]] +// PRMTD-NEXT: [[TMP10:%.*]] = fdiv float [[TMP8]], [[TMP5]] +// PRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP9]] to half +// PRMTD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[TMP10]] to half +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// PRMTD-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// PRMTD-NEXT: [[TMP11:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// PRMTD-NEXT: ret <2 x half> [[TMP11]] +// +// X86WINPRMTD-LABEL: define dso_local i32 @divf16( +// X86WINPRMTD-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 2 +// X86WINPRMTD-NEXT: store i32 [[B_COERCE]], ptr [[B]], align 2 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86WINPRMTD-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// X86WINPRMTD-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = fmul float [[EXT]], [[EXT2]] +// X86WINPRMTD-NEXT: [[TMP1:%.*]] = fmul float [[EXT1]], [[EXT3]] +// X86WINPRMTD-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]] +// X86WINPRMTD-NEXT: [[TMP3:%.*]] = fmul float [[EXT2]], [[EXT2]] +// X86WINPRMTD-NEXT: [[TMP4:%.*]] = fmul float [[EXT3]], [[EXT3]] +// X86WINPRMTD-NEXT: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]] +// X86WINPRMTD-NEXT: [[TMP6:%.*]] = fmul float [[EXT1]], [[EXT2]] +// X86WINPRMTD-NEXT: [[TMP7:%.*]] = fmul float [[EXT]], [[EXT3]] +// X86WINPRMTD-NEXT: [[TMP8:%.*]] = fsub float [[TMP6]], [[TMP7]] +// X86WINPRMTD-NEXT: [[TMP9:%.*]] = fdiv float [[TMP2]], [[TMP5]] +// X86WINPRMTD-NEXT: [[TMP10:%.*]] = fdiv float [[TMP8]], [[TMP5]] +// X86WINPRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP9]] to half +// X86WINPRMTD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[TMP10]] to half +// X86WINPRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86WINPRMTD-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 2 +// X86WINPRMTD-NEXT: ret i32 [[TMP11]] +// +// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @divf16( +// BASIC_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// BASIC_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// BASIC_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// BASIC_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// BASIC_FAST-NEXT: [[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT2]] +// BASIC_FAST-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT3]] +// BASIC_FAST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP0]], [[TMP1]] +// BASIC_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT2]], [[EXT2]] +// BASIC_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT3]], [[EXT3]] +// BASIC_FAST-NEXT: [[TMP5:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP3]], [[TMP4]] +// BASIC_FAST-NEXT: [[TMP6:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT2]] +// BASIC_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT3]] +// BASIC_FAST-NEXT: [[TMP8:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP6]], [[TMP7]] +// BASIC_FAST-NEXT: [[TMP9:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP2]], [[TMP5]] +// BASIC_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP8]], [[TMP5]] +// BASIC_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP9]] to half +// BASIC_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[TMP10]] to half +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// BASIC_FAST-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[TMP11:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// BASIC_FAST-NEXT: ret <2 x half> [[TMP11]] +// +// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @divf16( +// FULL_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// FULL_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// FULL_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// FULL_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// FULL_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// FULL_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) <2 x float> @__divsc3(float noundef nofpclass(nan inf) [[EXT]], float noundef nofpclass(nan inf) [[EXT1]], float noundef nofpclass(nan inf) [[EXT2]], float noundef nofpclass(nan inf) [[EXT3]]) #[[ATTR1:[0-9]+]] +// FULL_FAST-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL_FAST-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL_FAST-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL_FAST-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL_FAST-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half +// FULL_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[COERCE_IMAG]] to half +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// FULL_FAST-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// FULL_FAST-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// FULL_FAST-NEXT: ret <2 x half> [[TMP0]] +// +// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @divf16( +// IMPRVD_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// IMPRVD_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// IMPRVD_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// IMPRVD_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[EXT2]]) +// IMPRVD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[EXT3]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt float [[TMP0]], [[TMP1]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[EXT3]], [[EXT2]] +// IMPRVD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP2]], [[EXT3]] +// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[EXT2]], [[TMP3]] +// IMPRVD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[EXT]], [[TMP5]] +// IMPRVD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP6]], [[TMP4]] +// IMPRVD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[EXT1]], [[TMP8]] +// IMPRVD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP9]], [[TMP4]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[EXT2]], [[EXT3]] +// IMPRVD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP11]], [[EXT2]] +// IMPRVD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[EXT3]], [[TMP12]] +// IMPRVD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP14]], [[EXT1]] +// IMPRVD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP15]], [[TMP13]] +// IMPRVD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP17]], [[EXT]] +// IMPRVD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP18]], [[TMP13]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD_FAST: complex_div: +// IMPRVD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP20]] to half +// IMPRVD_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[TMP21]] to half +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// IMPRVD_FAST-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[TMP22:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// IMPRVD_FAST-NEXT: ret <2 x half> [[TMP22]] +// +// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @divf16( +// PRMTD_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// PRMTD_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// PRMTD_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// PRMTD_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP0]], [[TMP1]] +// PRMTD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT2]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT3]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP5:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP3]], [[TMP4]] +// PRMTD_FAST-NEXT: [[TMP6:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP8:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP6]], [[TMP7]] +// PRMTD_FAST-NEXT: [[TMP9:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP2]], [[TMP5]] +// PRMTD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP8]], [[TMP5]] +// PRMTD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP9]] to half +// PRMTD_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[TMP10]] to half +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// PRMTD_FAST-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[TMP11:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// PRMTD_FAST-NEXT: ret <2 x half> [[TMP11]] +// +_Complex _Float16 divf16(_Complex _Float16 a, _Complex _Float16 b) { + return a / b; +} + +// FULL-LABEL: define dso_local <2 x half> @mulf16( +// FULL-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// FULL-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// FULL-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// FULL-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// FULL-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// FULL-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// FULL-NEXT: [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]] +// FULL-NEXT: [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]] +// FULL-NEXT: [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]] +// FULL-NEXT: [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]] +// FULL-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// FULL-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// FULL-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno float [[MUL_R]], [[MUL_R]] +// FULL-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// FULL: complex_mul_imag_nan: +// FULL-NEXT: [[ISNAN_CMP4:%.*]] = fcmp uno float [[MUL_I]], [[MUL_I]] +// FULL-NEXT: br i1 [[ISNAN_CMP4]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL: complex_mul_libcall: +// FULL-NEXT: [[CALL:%.*]] = call <2 x float> @__mulsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR1]] +// FULL-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL: complex_mul_cont: +// FULL-NEXT: [[REAL_MUL_PHI:%.*]] = phi float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[IMAG_MUL_PHI:%.*]] = phi float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half +// FULL-NEXT: [[UNPROMOTION5:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// FULL-NEXT: store half [[UNPROMOTION5]], ptr [[RETVAL_IMAGP]], align 2 +// FULL-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// FULL-NEXT: ret <2 x half> [[TMP0]] +// +// BASIC-LABEL: define dso_local <2 x half> @mulf16( +// BASIC-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// BASIC-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// BASIC-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// BASIC-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// BASIC-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// BASIC-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// BASIC-NEXT: [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]] +// BASIC-NEXT: [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]] +// BASIC-NEXT: [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]] +// BASIC-NEXT: [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]] +// BASIC-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// BASIC-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// BASIC-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// BASIC-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[MUL_I]] to half +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// BASIC-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// BASIC-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// BASIC-NEXT: ret <2 x half> [[TMP0]] +// +// IMPRVD-LABEL: define dso_local <2 x half> @mulf16( +// IMPRVD-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// IMPRVD-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// IMPRVD-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// IMPRVD-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// IMPRVD-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// IMPRVD-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// IMPRVD-NEXT: [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]] +// IMPRVD-NEXT: [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]] +// IMPRVD-NEXT: [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]] +// IMPRVD-NEXT: [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]] +// IMPRVD-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// IMPRVD-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// IMPRVD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// IMPRVD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[MUL_I]] to half +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// IMPRVD-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// IMPRVD-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// IMPRVD-NEXT: ret <2 x half> [[TMP0]] +// +// PRMTD-LABEL: define dso_local <2 x half> @mulf16( +// PRMTD-SAME: <2 x half> noundef [[A_COERCE:%.*]], <2 x half> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// PRMTD-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// PRMTD-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// PRMTD-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// PRMTD-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// PRMTD-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// PRMTD-NEXT: [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]] +// PRMTD-NEXT: [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]] +// PRMTD-NEXT: [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]] +// PRMTD-NEXT: [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]] +// PRMTD-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// PRMTD-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// PRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// PRMTD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[MUL_I]] to half +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// PRMTD-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// PRMTD-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// PRMTD-NEXT: ret <2 x half> [[TMP0]] +// +// X86WINPRMTD-LABEL: define dso_local i32 @mulf16( +// X86WINPRMTD-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 2 +// X86WINPRMTD-NEXT: store i32 [[B_COERCE]], ptr [[B]], align 2 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86WINPRMTD-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// X86WINPRMTD-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// X86WINPRMTD-NEXT: [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]] +// X86WINPRMTD-NEXT: [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]] +// X86WINPRMTD-NEXT: [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]] +// X86WINPRMTD-NEXT: [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]] +// X86WINPRMTD-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// X86WINPRMTD-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// X86WINPRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// X86WINPRMTD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[MUL_I]] to half +// X86WINPRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86WINPRMTD-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 2 +// X86WINPRMTD-NEXT: ret i32 [[TMP0]] +// +// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @mulf16( +// BASIC_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// BASIC_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// BASIC_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// BASIC_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// BASIC_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT2]] +// BASIC_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT3]] +// BASIC_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT3]] +// BASIC_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT2]] +// BASIC_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// BASIC_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// BASIC_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// BASIC_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[MUL_I]] to half +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// BASIC_FAST-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// BASIC_FAST-NEXT: ret <2 x half> [[TMP0]] +// +// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @mulf16( +// FULL_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// FULL_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// FULL_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// FULL_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// FULL_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// FULL_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// FULL_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT2]] +// FULL_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT3]] +// FULL_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT3]] +// FULL_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT2]] +// FULL_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// FULL_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// FULL_FAST-NEXT: [[ISNAN_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno float [[MUL_R]], [[MUL_R]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// FULL_FAST: complex_mul_imag_nan: +// FULL_FAST-NEXT: [[ISNAN_CMP4:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno float [[MUL_I]], [[MUL_I]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP4]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL_FAST: complex_mul_libcall: +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) <2 x float> @__mulsc3(float noundef nofpclass(nan inf) [[EXT]], float noundef nofpclass(nan inf) [[EXT1]], float noundef nofpclass(nan inf) [[EXT2]], float noundef nofpclass(nan inf) [[EXT3]]) #[[ATTR1]] +// FULL_FAST-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL_FAST-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL_FAST-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL_FAST-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL_FAST-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL_FAST: complex_mul_cont: +// FULL_FAST-NEXT: [[REAL_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[IMAG_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half +// FULL_FAST-NEXT: [[UNPROMOTION5:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// FULL_FAST-NEXT: store half [[UNPROMOTION5]], ptr [[RETVAL_IMAGP]], align 2 +// FULL_FAST-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// FULL_FAST-NEXT: ret <2 x half> [[TMP0]] +// +// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @mulf16( +// IMPRVD_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// IMPRVD_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// IMPRVD_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// IMPRVD_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// IMPRVD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT2]] +// IMPRVD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT3]] +// IMPRVD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT3]] +// IMPRVD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT2]] +// IMPRVD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// IMPRVD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// IMPRVD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// IMPRVD_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[MUL_I]] to half +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// IMPRVD_FAST-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// IMPRVD_FAST-NEXT: ret <2 x half> [[TMP0]] +// +// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @mulf16( +// PRMTD_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x half> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// PRMTD_FAST-NEXT: store <2 x half> [[B_COERCE]], ptr [[B]], align 2 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// PRMTD_FAST-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// PRMTD_FAST-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// PRMTD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT2]] +// PRMTD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT3]] +// PRMTD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT3]] +// PRMTD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT1]], [[EXT2]] +// PRMTD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// PRMTD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// PRMTD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_R]] to half +// PRMTD_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[MUL_I]] to half +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// PRMTD_FAST-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// PRMTD_FAST-NEXT: ret <2 x half> [[TMP0]] +// +_Complex _Float16 mulf16(_Complex _Float16 a, _Complex _Float16 b) { + return a * b; +} + +// FULL-LABEL: define dso_local <2 x half> @f1( +// FULL-SAME: <2 x half> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// FULL-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// FULL-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// FULL-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// FULL-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// FULL-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// FULL-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// FULL-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// FULL-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef [[B_REAL]], x86_fp80 noundef [[B_IMAG]], x86_fp80 noundef [[CONV]], x86_fp80 noundef [[CONV1]]) #[[ATTR1]] +// FULL-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to half +// FULL-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP1]] to half +// FULL-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// FULL-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// FULL-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// FULL-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// FULL-NEXT: [[CALL7:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef [[EXT4]], float noundef [[EXT5]], float noundef [[EXT6]]) #[[ATTR1]] +// FULL-NEXT: store <2 x float> [[CALL7]], ptr [[COERCE]], align 4 +// FULL-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half +// FULL-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[COERCE_IMAG]] to half +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// FULL-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 +// FULL-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// FULL-NEXT: ret <2 x half> [[TMP2]] +// +// BASIC-LABEL: define dso_local <2 x half> @f1( +// BASIC-SAME: <2 x half> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// BASIC-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// BASIC-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// BASIC-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// BASIC-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// BASIC-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// BASIC-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// BASIC-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// BASIC-NEXT: [[TMP0:%.*]] = fmul x86_fp80 [[B_REAL]], [[CONV]] +// BASIC-NEXT: [[TMP1:%.*]] = fmul x86_fp80 [[B_IMAG]], [[CONV1]] +// BASIC-NEXT: [[TMP2:%.*]] = fadd x86_fp80 [[TMP0]], [[TMP1]] +// BASIC-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[CONV]], [[CONV]] +// BASIC-NEXT: [[TMP4:%.*]] = fmul x86_fp80 [[CONV1]], [[CONV1]] +// BASIC-NEXT: [[TMP5:%.*]] = fadd x86_fp80 [[TMP3]], [[TMP4]] +// BASIC-NEXT: [[TMP6:%.*]] = fmul x86_fp80 [[B_IMAG]], [[CONV]] +// BASIC-NEXT: [[TMP7:%.*]] = fmul x86_fp80 [[B_REAL]], [[CONV1]] +// BASIC-NEXT: [[TMP8:%.*]] = fsub x86_fp80 [[TMP6]], [[TMP7]] +// BASIC-NEXT: [[TMP9:%.*]] = fdiv x86_fp80 [[TMP2]], [[TMP5]] +// BASIC-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP8]], [[TMP5]] +// BASIC-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP9]] to half +// BASIC-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP10]] to half +// BASIC-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// BASIC-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// BASIC-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// BASIC-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// BASIC-NEXT: [[TMP11:%.*]] = fmul float [[EXT]], [[EXT5]] +// BASIC-NEXT: [[TMP12:%.*]] = fmul float [[EXT4]], [[EXT6]] +// BASIC-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] +// BASIC-NEXT: [[TMP14:%.*]] = fmul float [[EXT5]], [[EXT5]] +// BASIC-NEXT: [[TMP15:%.*]] = fmul float [[EXT6]], [[EXT6]] +// BASIC-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] +// BASIC-NEXT: [[TMP17:%.*]] = fmul float [[EXT4]], [[EXT5]] +// BASIC-NEXT: [[TMP18:%.*]] = fmul float [[EXT]], [[EXT6]] +// BASIC-NEXT: [[TMP19:%.*]] = fsub float [[TMP17]], [[TMP18]] +// BASIC-NEXT: [[TMP20:%.*]] = fdiv float [[TMP13]], [[TMP16]] +// BASIC-NEXT: [[TMP21:%.*]] = fdiv float [[TMP19]], [[TMP16]] +// BASIC-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP20]] to half +// BASIC-NEXT: [[UNPROMOTION7:%.*]] = fptrunc float [[TMP21]] to half +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// BASIC-NEXT: store half [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 2 +// BASIC-NEXT: [[TMP22:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// BASIC-NEXT: ret <2 x half> [[TMP22]] +// +// IMPRVD-LABEL: define dso_local <2 x half> @f1( +// IMPRVD-SAME: <2 x half> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// IMPRVD-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// IMPRVD-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// IMPRVD-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// IMPRVD-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// IMPRVD-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// IMPRVD-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// IMPRVD-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// IMPRVD-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// IMPRVD-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// IMPRVD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt x86_fp80 [[TMP0]], [[TMP1]] +// IMPRVD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD-NEXT: [[TMP2:%.*]] = fdiv x86_fp80 [[CONV1]], [[CONV]] +// IMPRVD-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[TMP2]], [[CONV1]] +// IMPRVD-NEXT: [[TMP4:%.*]] = fadd x86_fp80 [[CONV]], [[TMP3]] +// IMPRVD-NEXT: [[TMP5:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP2]] +// IMPRVD-NEXT: [[TMP6:%.*]] = fadd x86_fp80 [[B_REAL]], [[TMP5]] +// IMPRVD-NEXT: [[TMP7:%.*]] = fdiv x86_fp80 [[TMP6]], [[TMP4]] +// IMPRVD-NEXT: [[TMP8:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP2]] +// IMPRVD-NEXT: [[TMP9:%.*]] = fsub x86_fp80 [[B_IMAG]], [[TMP8]] +// IMPRVD-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP9]], [[TMP4]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi: +// IMPRVD-NEXT: [[TMP11:%.*]] = fdiv x86_fp80 [[CONV]], [[CONV1]] +// IMPRVD-NEXT: [[TMP12:%.*]] = fmul x86_fp80 [[TMP11]], [[CONV]] +// IMPRVD-NEXT: [[TMP13:%.*]] = fadd x86_fp80 [[CONV1]], [[TMP12]] +// IMPRVD-NEXT: [[TMP14:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP11]] +// IMPRVD-NEXT: [[TMP15:%.*]] = fadd x86_fp80 [[TMP14]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP16:%.*]] = fdiv x86_fp80 [[TMP15]], [[TMP13]] +// IMPRVD-NEXT: [[TMP17:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP11]] +// IMPRVD-NEXT: [[TMP18:%.*]] = fsub x86_fp80 [[TMP17]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP19:%.*]] = fdiv x86_fp80 [[TMP18]], [[TMP13]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD: complex_div: +// IMPRVD-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to half +// IMPRVD-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to half +// IMPRVD-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// IMPRVD-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// IMPRVD-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// IMPRVD-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// IMPRVD-NEXT: [[TMP22:%.*]] = call float @llvm.fabs.f32(float [[EXT5]]) +// IMPRVD-NEXT: [[TMP23:%.*]] = call float @llvm.fabs.f32(float [[EXT6]]) +// IMPRVD-NEXT: [[ABS_CMP7:%.*]] = fcmp ugt float [[TMP22]], [[TMP23]] +// IMPRVD-NEXT: br i1 [[ABS_CMP7]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI8:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI9:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi8: +// IMPRVD-NEXT: [[TMP24:%.*]] = fdiv float [[EXT6]], [[EXT5]] +// IMPRVD-NEXT: [[TMP25:%.*]] = fmul float [[TMP24]], [[EXT6]] +// IMPRVD-NEXT: [[TMP26:%.*]] = fadd float [[EXT5]], [[TMP25]] +// IMPRVD-NEXT: [[TMP27:%.*]] = fmul float [[EXT4]], [[TMP24]] +// IMPRVD-NEXT: [[TMP28:%.*]] = fadd float [[EXT]], [[TMP27]] +// IMPRVD-NEXT: [[TMP29:%.*]] = fdiv float [[TMP28]], [[TMP26]] +// IMPRVD-NEXT: [[TMP30:%.*]] = fmul float [[EXT]], [[TMP24]] +// IMPRVD-NEXT: [[TMP31:%.*]] = fsub float [[EXT4]], [[TMP30]] +// IMPRVD-NEXT: [[TMP32:%.*]] = fdiv float [[TMP31]], [[TMP26]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV10:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi9: +// IMPRVD-NEXT: [[TMP33:%.*]] = fdiv float [[EXT5]], [[EXT6]] +// IMPRVD-NEXT: [[TMP34:%.*]] = fmul float [[TMP33]], [[EXT5]] +// IMPRVD-NEXT: [[TMP35:%.*]] = fadd float [[EXT6]], [[TMP34]] +// IMPRVD-NEXT: [[TMP36:%.*]] = fmul float [[EXT]], [[TMP33]] +// IMPRVD-NEXT: [[TMP37:%.*]] = fadd float [[TMP36]], [[EXT4]] +// IMPRVD-NEXT: [[TMP38:%.*]] = fdiv float [[TMP37]], [[TMP35]] +// IMPRVD-NEXT: [[TMP39:%.*]] = fmul float [[EXT4]], [[TMP33]] +// IMPRVD-NEXT: [[TMP40:%.*]] = fsub float [[TMP39]], [[EXT]] +// IMPRVD-NEXT: [[TMP41:%.*]] = fdiv float [[TMP40]], [[TMP35]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV10]] +// IMPRVD: complex_div10: +// IMPRVD-NEXT: [[TMP42:%.*]] = phi float [ [[TMP29]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI8]] ], [ [[TMP38]], [[ABS_RHSR_LESS_THAN_ABS_RHSI9]] ] +// IMPRVD-NEXT: [[TMP43:%.*]] = phi float [ [[TMP32]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI8]] ], [ [[TMP41]], [[ABS_RHSR_LESS_THAN_ABS_RHSI9]] ] +// IMPRVD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP42]] to half +// IMPRVD-NEXT: [[UNPROMOTION11:%.*]] = fptrunc float [[TMP43]] to half +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// IMPRVD-NEXT: store half [[UNPROMOTION11]], ptr [[RETVAL_IMAGP]], align 2 +// IMPRVD-NEXT: [[TMP44:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// IMPRVD-NEXT: ret <2 x half> [[TMP44]] +// +// PRMTD-LABEL: define dso_local <2 x half> @f1( +// PRMTD-SAME: <2 x half> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// PRMTD-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// PRMTD-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// PRMTD-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// PRMTD-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// PRMTD-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// PRMTD-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// PRMTD-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// PRMTD-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// PRMTD-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// PRMTD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt x86_fp80 [[TMP0]], [[TMP1]] +// PRMTD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD-NEXT: [[TMP2:%.*]] = fdiv x86_fp80 [[CONV1]], [[CONV]] +// PRMTD-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[TMP2]], [[CONV1]] +// PRMTD-NEXT: [[TMP4:%.*]] = fadd x86_fp80 [[CONV]], [[TMP3]] +// PRMTD-NEXT: [[TMP5:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP2]] +// PRMTD-NEXT: [[TMP6:%.*]] = fadd x86_fp80 [[B_REAL]], [[TMP5]] +// PRMTD-NEXT: [[TMP7:%.*]] = fdiv x86_fp80 [[TMP6]], [[TMP4]] +// PRMTD-NEXT: [[TMP8:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP2]] +// PRMTD-NEXT: [[TMP9:%.*]] = fsub x86_fp80 [[B_IMAG]], [[TMP8]] +// PRMTD-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP9]], [[TMP4]] +// PRMTD-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD: abs_rhsr_less_than_abs_rhsi: +// PRMTD-NEXT: [[TMP11:%.*]] = fdiv x86_fp80 [[CONV]], [[CONV1]] +// PRMTD-NEXT: [[TMP12:%.*]] = fmul x86_fp80 [[TMP11]], [[CONV]] +// PRMTD-NEXT: [[TMP13:%.*]] = fadd x86_fp80 [[CONV1]], [[TMP12]] +// PRMTD-NEXT: [[TMP14:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP11]] +// PRMTD-NEXT: [[TMP15:%.*]] = fadd x86_fp80 [[TMP14]], [[B_IMAG]] +// PRMTD-NEXT: [[TMP16:%.*]] = fdiv x86_fp80 [[TMP15]], [[TMP13]] +// PRMTD-NEXT: [[TMP17:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP11]] +// PRMTD-NEXT: [[TMP18:%.*]] = fsub x86_fp80 [[TMP17]], [[B_REAL]] +// PRMTD-NEXT: [[TMP19:%.*]] = fdiv x86_fp80 [[TMP18]], [[TMP13]] +// PRMTD-NEXT: br label [[COMPLEX_DIV]] +// PRMTD: complex_div: +// PRMTD-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to half +// PRMTD-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to half +// PRMTD-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// PRMTD-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// PRMTD-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// PRMTD-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// PRMTD-NEXT: [[TMP22:%.*]] = fmul float [[EXT]], [[EXT5]] +// PRMTD-NEXT: [[TMP23:%.*]] = fmul float [[EXT4]], [[EXT6]] +// PRMTD-NEXT: [[TMP24:%.*]] = fadd float [[TMP22]], [[TMP23]] +// PRMTD-NEXT: [[TMP25:%.*]] = fmul float [[EXT5]], [[EXT5]] +// PRMTD-NEXT: [[TMP26:%.*]] = fmul float [[EXT6]], [[EXT6]] +// PRMTD-NEXT: [[TMP27:%.*]] = fadd float [[TMP25]], [[TMP26]] +// PRMTD-NEXT: [[TMP28:%.*]] = fmul float [[EXT4]], [[EXT5]] +// PRMTD-NEXT: [[TMP29:%.*]] = fmul float [[EXT]], [[EXT6]] +// PRMTD-NEXT: [[TMP30:%.*]] = fsub float [[TMP28]], [[TMP29]] +// PRMTD-NEXT: [[TMP31:%.*]] = fdiv float [[TMP24]], [[TMP27]] +// PRMTD-NEXT: [[TMP32:%.*]] = fdiv float [[TMP30]], [[TMP27]] +// PRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP31]] to half +// PRMTD-NEXT: [[UNPROMOTION7:%.*]] = fptrunc float [[TMP32]] to half +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// PRMTD-NEXT: store half [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 2 +// PRMTD-NEXT: [[TMP33:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// PRMTD-NEXT: ret <2 x half> [[TMP33]] +// +// X86WINPRMTD-LABEL: define dso_local i32 @f1( +// X86WINPRMTD-SAME: i32 noundef [[A_COERCE:%.*]], ptr noundef [[B:%.*]], i32 noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// X86WINPRMTD-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 2 +// X86WINPRMTD-NEXT: store i32 [[C_COERCE]], ptr [[C]], align 2 +// X86WINPRMTD-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// X86WINPRMTD-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to double +// X86WINPRMTD-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to double +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) +// X86WINPRMTD-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) +// X86WINPRMTD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP0]], [[TMP1]] +// X86WINPRMTD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// X86WINPRMTD: abs_rhsr_greater_or_equal_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP2:%.*]] = fdiv double [[CONV1]], [[CONV]] +// X86WINPRMTD-NEXT: [[TMP3:%.*]] = fmul double [[TMP2]], [[CONV1]] +// X86WINPRMTD-NEXT: [[TMP4:%.*]] = fadd double [[CONV]], [[TMP3]] +// X86WINPRMTD-NEXT: [[TMP5:%.*]] = fmul double [[B_IMAG]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP6:%.*]] = fadd double [[B_REAL]], [[TMP5]] +// X86WINPRMTD-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], [[TMP4]] +// X86WINPRMTD-NEXT: [[TMP8:%.*]] = fmul double [[B_REAL]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP9:%.*]] = fsub double [[B_IMAG]], [[TMP8]] +// X86WINPRMTD-NEXT: [[TMP10:%.*]] = fdiv double [[TMP9]], [[TMP4]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV:%.*]] +// X86WINPRMTD: abs_rhsr_less_than_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP11:%.*]] = fdiv double [[CONV]], [[CONV1]] +// X86WINPRMTD-NEXT: [[TMP12:%.*]] = fmul double [[TMP11]], [[CONV]] +// X86WINPRMTD-NEXT: [[TMP13:%.*]] = fadd double [[CONV1]], [[TMP12]] +// X86WINPRMTD-NEXT: [[TMP14:%.*]] = fmul double [[B_REAL]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP15:%.*]] = fadd double [[TMP14]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[TMP16:%.*]] = fdiv double [[TMP15]], [[TMP13]] +// X86WINPRMTD-NEXT: [[TMP17:%.*]] = fmul double [[B_IMAG]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP18:%.*]] = fsub double [[TMP17]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[TMP19:%.*]] = fdiv double [[TMP18]], [[TMP13]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV]] +// X86WINPRMTD: complex_div: +// X86WINPRMTD-NEXT: [[TMP20:%.*]] = phi double [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[CONV2:%.*]] = fptrunc double [[TMP20]] to half +// X86WINPRMTD-NEXT: [[CONV3:%.*]] = fptrunc double [[TMP21]] to half +// X86WINPRMTD-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// X86WINPRMTD-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// X86WINPRMTD-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// X86WINPRMTD-NEXT: [[TMP22:%.*]] = fmul float [[EXT]], [[EXT5]] +// X86WINPRMTD-NEXT: [[TMP23:%.*]] = fmul float [[EXT4]], [[EXT6]] +// X86WINPRMTD-NEXT: [[TMP24:%.*]] = fadd float [[TMP22]], [[TMP23]] +// X86WINPRMTD-NEXT: [[TMP25:%.*]] = fmul float [[EXT5]], [[EXT5]] +// X86WINPRMTD-NEXT: [[TMP26:%.*]] = fmul float [[EXT6]], [[EXT6]] +// X86WINPRMTD-NEXT: [[TMP27:%.*]] = fadd float [[TMP25]], [[TMP26]] +// X86WINPRMTD-NEXT: [[TMP28:%.*]] = fmul float [[EXT4]], [[EXT5]] +// X86WINPRMTD-NEXT: [[TMP29:%.*]] = fmul float [[EXT]], [[EXT6]] +// X86WINPRMTD-NEXT: [[TMP30:%.*]] = fsub float [[TMP28]], [[TMP29]] +// X86WINPRMTD-NEXT: [[TMP31:%.*]] = fdiv float [[TMP24]], [[TMP27]] +// X86WINPRMTD-NEXT: [[TMP32:%.*]] = fdiv float [[TMP30]], [[TMP27]] +// X86WINPRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP31]] to half +// X86WINPRMTD-NEXT: [[UNPROMOTION7:%.*]] = fptrunc float [[TMP32]] to half +// X86WINPRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86WINPRMTD-NEXT: store half [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 2 +// X86WINPRMTD-NEXT: [[TMP33:%.*]] = load i32, ptr [[RETVAL]], align 2 +// X86WINPRMTD-NEXT: ret i32 [[TMP33]] +// +// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @f1( +// BASIC_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// BASIC_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// BASIC_FAST-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// BASIC_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// BASIC_FAST-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[CONV]] +// BASIC_FAST-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[CONV1]] +// BASIC_FAST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP0]], [[TMP1]] +// BASIC_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[CONV]] +// BASIC_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[CONV1]] +// BASIC_FAST-NEXT: [[TMP5:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP3]], [[TMP4]] +// BASIC_FAST-NEXT: [[TMP6:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[CONV]] +// BASIC_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[CONV1]] +// BASIC_FAST-NEXT: [[TMP8:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP7]] +// BASIC_FAST-NEXT: [[TMP9:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[TMP5]] +// BASIC_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP8]], [[TMP5]] +// BASIC_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP9]] to half +// BASIC_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP10]] to half +// BASIC_FAST-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// BASIC_FAST-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// BASIC_FAST-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// BASIC_FAST-NEXT: [[TMP11:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT5]] +// BASIC_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT4]], [[EXT6]] +// BASIC_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP11]], [[TMP12]] +// BASIC_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT5]], [[EXT5]] +// BASIC_FAST-NEXT: [[TMP15:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT6]], [[EXT6]] +// BASIC_FAST-NEXT: [[TMP16:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP14]], [[TMP15]] +// BASIC_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT4]], [[EXT5]] +// BASIC_FAST-NEXT: [[TMP18:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT6]] +// BASIC_FAST-NEXT: [[TMP19:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP17]], [[TMP18]] +// BASIC_FAST-NEXT: [[TMP20:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP13]], [[TMP16]] +// BASIC_FAST-NEXT: [[TMP21:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP19]], [[TMP16]] +// BASIC_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP20]] to half +// BASIC_FAST-NEXT: [[UNPROMOTION7:%.*]] = fptrunc float [[TMP21]] to half +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// BASIC_FAST-NEXT: store half [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 2 +// BASIC_FAST-NEXT: [[TMP22:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// BASIC_FAST-NEXT: ret <2 x half> [[TMP22]] +// +// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @f1( +// FULL_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// FULL_FAST-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// FULL_FAST-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// FULL_FAST-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// FULL_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// FULL_FAST-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// FULL_FAST-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// FULL_FAST-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]] +// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to half +// FULL_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP1]] to half +// FULL_FAST-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// FULL_FAST-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// FULL_FAST-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// FULL_FAST-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// FULL_FAST-NEXT: [[CALL7:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) <2 x float> @__divsc3(float noundef nofpclass(nan inf) [[EXT]], float noundef nofpclass(nan inf) [[EXT4]], float noundef nofpclass(nan inf) [[EXT5]], float noundef nofpclass(nan inf) [[EXT6]]) #[[ATTR1]] +// FULL_FAST-NEXT: store <2 x float> [[CALL7]], ptr [[COERCE]], align 4 +// FULL_FAST-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL_FAST-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL_FAST-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL_FAST-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half +// FULL_FAST-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[COERCE_IMAG]] to half +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// FULL_FAST-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2 +// FULL_FAST-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// FULL_FAST-NEXT: ret <2 x half> [[TMP2]] +// +// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @f1( +// IMPRVD_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// IMPRVD_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// IMPRVD_FAST-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// IMPRVD_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// IMPRVD_FAST-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// IMPRVD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt x86_fp80 [[TMP0]], [[TMP1]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[CONV]] +// IMPRVD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[CONV1]] +// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[TMP3]] +// IMPRVD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP5]] +// IMPRVD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP4]] +// IMPRVD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP8]] +// IMPRVD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP9]], [[TMP4]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[CONV1]] +// IMPRVD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP11]], [[CONV]] +// IMPRVD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[TMP12]] +// IMPRVD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP14]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP15]], [[TMP13]] +// IMPRVD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP17]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP18]], [[TMP13]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD_FAST: complex_div: +// IMPRVD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to half +// IMPRVD_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to half +// IMPRVD_FAST-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// IMPRVD_FAST-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// IMPRVD_FAST-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// IMPRVD_FAST-NEXT: [[TMP22:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[EXT5]]) +// IMPRVD_FAST-NEXT: [[TMP23:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[EXT6]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP7:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt float [[TMP22]], [[TMP23]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP7]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI8:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI9:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi8: +// IMPRVD_FAST-NEXT: [[TMP24:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[EXT6]], [[EXT5]] +// IMPRVD_FAST-NEXT: [[TMP25:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP24]], [[EXT6]] +// IMPRVD_FAST-NEXT: [[TMP26:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[EXT5]], [[TMP25]] +// IMPRVD_FAST-NEXT: [[TMP27:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT4]], [[TMP24]] +// IMPRVD_FAST-NEXT: [[TMP28:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[EXT]], [[TMP27]] +// IMPRVD_FAST-NEXT: [[TMP29:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP28]], [[TMP26]] +// IMPRVD_FAST-NEXT: [[TMP30:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[TMP24]] +// IMPRVD_FAST-NEXT: [[TMP31:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[EXT4]], [[TMP30]] +// IMPRVD_FAST-NEXT: [[TMP32:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP31]], [[TMP26]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV10:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi9: +// IMPRVD_FAST-NEXT: [[TMP33:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[EXT5]], [[EXT6]] +// IMPRVD_FAST-NEXT: [[TMP34:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP33]], [[EXT5]] +// IMPRVD_FAST-NEXT: [[TMP35:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[EXT6]], [[TMP34]] +// IMPRVD_FAST-NEXT: [[TMP36:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[TMP33]] +// IMPRVD_FAST-NEXT: [[TMP37:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP36]], [[EXT4]] +// IMPRVD_FAST-NEXT: [[TMP38:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP37]], [[TMP35]] +// IMPRVD_FAST-NEXT: [[TMP39:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT4]], [[TMP33]] +// IMPRVD_FAST-NEXT: [[TMP40:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP39]], [[EXT]] +// IMPRVD_FAST-NEXT: [[TMP41:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP40]], [[TMP35]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV10]] +// IMPRVD_FAST: complex_div10: +// IMPRVD_FAST-NEXT: [[TMP42:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP29]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI8]] ], [ [[TMP38]], [[ABS_RHSR_LESS_THAN_ABS_RHSI9]] ] +// IMPRVD_FAST-NEXT: [[TMP43:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP32]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI8]] ], [ [[TMP41]], [[ABS_RHSR_LESS_THAN_ABS_RHSI9]] ] +// IMPRVD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP42]] to half +// IMPRVD_FAST-NEXT: [[UNPROMOTION11:%.*]] = fptrunc float [[TMP43]] to half +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// IMPRVD_FAST-NEXT: store half [[UNPROMOTION11]], ptr [[RETVAL_IMAGP]], align 2 +// IMPRVD_FAST-NEXT: [[TMP44:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// IMPRVD_FAST-NEXT: ret <2 x half> [[TMP44]] +// +// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x half> @f1( +// PRMTD_FAST-SAME: <2 x half> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// PRMTD_FAST-NEXT: store <2 x half> [[A_COERCE]], ptr [[A]], align 2 +// PRMTD_FAST-NEXT: store <2 x half> [[C_COERCE]], ptr [[C]], align 2 +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// PRMTD_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 +// PRMTD_FAST-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// PRMTD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// PRMTD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt x86_fp80 [[TMP0]], [[TMP1]] +// PRMTD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[CONV]] +// PRMTD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[CONV1]] +// PRMTD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[TMP3]] +// PRMTD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP2]] +// PRMTD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP5]] +// PRMTD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP4]] +// PRMTD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP2]] +// PRMTD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP8]] +// PRMTD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP9]], [[TMP4]] +// PRMTD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD_FAST: abs_rhsr_less_than_abs_rhsi: +// PRMTD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[CONV1]] +// PRMTD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP11]], [[CONV]] +// PRMTD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[TMP12]] +// PRMTD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP11]] +// PRMTD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP14]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP15]], [[TMP13]] +// PRMTD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP11]] +// PRMTD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP17]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP18]], [[TMP13]] +// PRMTD_FAST-NEXT: br label [[COMPLEX_DIV]] +// PRMTD_FAST: complex_div: +// PRMTD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to half +// PRMTD_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to half +// PRMTD_FAST-NEXT: [[EXT:%.*]] = fpext half [[CONV2]] to float +// PRMTD_FAST-NEXT: [[EXT4:%.*]] = fpext half [[CONV3]] to float +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[EXT5:%.*]] = fpext half [[A_REAL]] to float +// PRMTD_FAST-NEXT: [[EXT6:%.*]] = fpext half [[A_IMAG]] to float +// PRMTD_FAST-NEXT: [[TMP22:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT5]] +// PRMTD_FAST-NEXT: [[TMP23:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT4]], [[EXT6]] +// PRMTD_FAST-NEXT: [[TMP24:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP22]], [[TMP23]] +// PRMTD_FAST-NEXT: [[TMP25:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT5]], [[EXT5]] +// PRMTD_FAST-NEXT: [[TMP26:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT6]], [[EXT6]] +// PRMTD_FAST-NEXT: [[TMP27:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP25]], [[TMP26]] +// PRMTD_FAST-NEXT: [[TMP28:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT4]], [[EXT5]] +// PRMTD_FAST-NEXT: [[TMP29:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[EXT]], [[EXT6]] +// PRMTD_FAST-NEXT: [[TMP30:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP28]], [[TMP29]] +// PRMTD_FAST-NEXT: [[TMP31:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP24]], [[TMP27]] +// PRMTD_FAST-NEXT: [[TMP32:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP30]], [[TMP27]] +// PRMTD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP31]] to half +// PRMTD_FAST-NEXT: [[UNPROMOTION7:%.*]] = fptrunc float [[TMP32]] to half +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// PRMTD_FAST-NEXT: store half [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 2 +// PRMTD_FAST-NEXT: [[TMP33:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// PRMTD_FAST-NEXT: ret <2 x half> [[TMP33]] +// +_Complex _Float16 f1(_Complex _Float16 a, _Complex long double b, _Complex _Float16 c) { + return (_Complex _Float16)(b / c) / a; +} diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index 516220a0dbdfe..9ec80252085b8 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -o - | FileCheck %s --check-prefix=FULL @@ -16,10 +17,21 @@ // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown \ // RUN: -complex-range=full -o - | FileCheck %s --check-prefix=FULL +// RUN: %clang_cc1 -triple x86_64-windows-pc -complex-range=promoted \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=X86WINPRMTD + +// RUN: %clang_cc1 -triple=avr-unknown-unknown -mdouble=32 \ +// RUN: -complex-range=promoted -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=AVRFP32 + +// RUN: %clang_cc1 -triple=avr-unknown-unknown -mdouble=64 \ +// RUN: -complex-range=promoted -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=AVRFP64 + // Fast math // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=basic -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=BASIC +// RUN: | FileCheck %s --check-prefix=BASIC_FAST // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=full -emit-llvm -o - %s \ @@ -30,602 +42,3410 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=improved -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=IMPRVD +// RUN: | FileCheck %s --check-prefix=IMPRVD_FAST // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ // RUN: -ffast-math -complex-range=promoted -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=PRMTD +// RUN: | FileCheck %s --check-prefix=PRMTD_FAST +// FULL-LABEL: define dso_local <2 x float> @divf( +// FULL-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// FULL-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// FULL-NEXT: [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[A_REAL]], float noundef [[A_IMAG]], float noundef [[B_REAL]], float noundef [[B_IMAG]]) #[[ATTR2:[0-9]+]] +// FULL-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store float [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 4 +// FULL-NEXT: store float [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 4 +// FULL-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// FULL-NEXT: ret <2 x float> [[TMP0]] +// +// BASIC-LABEL: define dso_local <2 x float> @divf( +// BASIC-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// BASIC-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// BASIC-NEXT: [[TMP0:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// BASIC-NEXT: [[TMP1:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]] +// BASIC-NEXT: [[TMP3:%.*]] = fmul float [[B_REAL]], [[B_REAL]] +// BASIC-NEXT: [[TMP4:%.*]] = fmul float [[B_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]] +// BASIC-NEXT: [[TMP6:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// BASIC-NEXT: [[TMP7:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// BASIC-NEXT: [[TMP8:%.*]] = fsub float [[TMP6]], [[TMP7]] +// BASIC-NEXT: [[TMP9:%.*]] = fdiv float [[TMP2]], [[TMP5]] +// BASIC-NEXT: [[TMP10:%.*]] = fdiv float [[TMP8]], [[TMP5]] +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store float [[TMP9]], ptr [[RETVAL_REALP]], align 4 +// BASIC-NEXT: store float [[TMP10]], ptr [[RETVAL_IMAGP]], align 4 +// BASIC-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// BASIC-NEXT: ret <2 x float> [[TMP11]] +// +// IMPRVD-LABEL: define dso_local <2 x float> @divf( +// IMPRVD-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// IMPRVD-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// IMPRVD-NEXT: [[TMP0:%.*]] = call float @llvm.fabs.f32(float [[B_REAL]]) +// IMPRVD-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[B_IMAG]]) +// IMPRVD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP0]], [[TMP1]] +// IMPRVD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD-NEXT: [[TMP2:%.*]] = fdiv float [[B_IMAG]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP4:%.*]] = fadd float [[B_REAL]], [[TMP3]] +// IMPRVD-NEXT: [[TMP5:%.*]] = fmul float [[A_IMAG]], [[TMP2]] +// IMPRVD-NEXT: [[TMP6:%.*]] = fadd float [[A_REAL]], [[TMP5]] +// IMPRVD-NEXT: [[TMP7:%.*]] = fdiv float [[TMP6]], [[TMP4]] +// IMPRVD-NEXT: [[TMP8:%.*]] = fmul float [[A_REAL]], [[TMP2]] +// IMPRVD-NEXT: [[TMP9:%.*]] = fsub float [[A_IMAG]], [[TMP8]] +// IMPRVD-NEXT: [[TMP10:%.*]] = fdiv float [[TMP9]], [[TMP4]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi: +// IMPRVD-NEXT: [[TMP11:%.*]] = fdiv float [[B_REAL]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP12:%.*]] = fmul float [[TMP11]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP13:%.*]] = fadd float [[B_IMAG]], [[TMP12]] +// IMPRVD-NEXT: [[TMP14:%.*]] = fmul float [[A_REAL]], [[TMP11]] +// IMPRVD-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[A_IMAG]] +// IMPRVD-NEXT: [[TMP16:%.*]] = fdiv float [[TMP15]], [[TMP13]] +// IMPRVD-NEXT: [[TMP17:%.*]] = fmul float [[A_IMAG]], [[TMP11]] +// IMPRVD-NEXT: [[TMP18:%.*]] = fsub float [[TMP17]], [[A_REAL]] +// IMPRVD-NEXT: [[TMP19:%.*]] = fdiv float [[TMP18]], [[TMP13]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD: complex_div: +// IMPRVD-NEXT: [[TMP20:%.*]] = phi float [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[TMP21:%.*]] = phi float [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store float [[TMP20]], ptr [[RETVAL_REALP]], align 4 +// IMPRVD-NEXT: store float [[TMP21]], ptr [[RETVAL_IMAGP]], align 4 +// IMPRVD-NEXT: [[TMP22:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// IMPRVD-NEXT: ret <2 x float> [[TMP22]] +// +// PRMTD-LABEL: define dso_local <2 x float> @divf( +// PRMTD-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD-NEXT: [[EXT:%.*]] = fpext float [[A_REAL]] to double +// PRMTD-NEXT: [[EXT1:%.*]] = fpext float [[A_IMAG]] to double +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// PRMTD-NEXT: [[EXT2:%.*]] = fpext float [[B_REAL]] to double +// PRMTD-NEXT: [[EXT3:%.*]] = fpext float [[B_IMAG]] to double +// PRMTD-NEXT: [[TMP0:%.*]] = fmul double [[EXT]], [[EXT2]] +// PRMTD-NEXT: [[TMP1:%.*]] = fmul double [[EXT1]], [[EXT3]] +// PRMTD-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]] +// PRMTD-NEXT: [[TMP3:%.*]] = fmul double [[EXT2]], [[EXT2]] +// PRMTD-NEXT: [[TMP4:%.*]] = fmul double [[EXT3]], [[EXT3]] +// PRMTD-NEXT: [[TMP5:%.*]] = fadd double [[TMP3]], [[TMP4]] +// PRMTD-NEXT: [[TMP6:%.*]] = fmul double [[EXT1]], [[EXT2]] +// PRMTD-NEXT: [[TMP7:%.*]] = fmul double [[EXT]], [[EXT3]] +// PRMTD-NEXT: [[TMP8:%.*]] = fsub double [[TMP6]], [[TMP7]] +// PRMTD-NEXT: [[TMP9:%.*]] = fdiv double [[TMP2]], [[TMP5]] +// PRMTD-NEXT: [[TMP10:%.*]] = fdiv double [[TMP8]], [[TMP5]] +// PRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP9]] to float +// PRMTD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc double [[TMP10]] to float +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// PRMTD-NEXT: store float [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD-NEXT: ret <2 x float> [[TMP11]] +// +// X86WINPRMTD-LABEL: define dso_local i64 @divf( +// X86WINPRMTD-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: store i64 [[A_COERCE]], ptr [[A]], align 4 +// X86WINPRMTD-NEXT: store i64 [[B_COERCE]], ptr [[B]], align 4 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[EXT:%.*]] = fpext float [[A_REAL]] to double +// X86WINPRMTD-NEXT: [[EXT1:%.*]] = fpext float [[A_IMAG]] to double +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[EXT2:%.*]] = fpext float [[B_REAL]] to double +// X86WINPRMTD-NEXT: [[EXT3:%.*]] = fpext float [[B_IMAG]] to double +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = fmul double [[EXT]], [[EXT2]] +// X86WINPRMTD-NEXT: [[TMP1:%.*]] = fmul double [[EXT1]], [[EXT3]] +// X86WINPRMTD-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]] +// X86WINPRMTD-NEXT: [[TMP3:%.*]] = fmul double [[EXT2]], [[EXT2]] +// X86WINPRMTD-NEXT: [[TMP4:%.*]] = fmul double [[EXT3]], [[EXT3]] +// X86WINPRMTD-NEXT: [[TMP5:%.*]] = fadd double [[TMP3]], [[TMP4]] +// X86WINPRMTD-NEXT: [[TMP6:%.*]] = fmul double [[EXT1]], [[EXT2]] +// X86WINPRMTD-NEXT: [[TMP7:%.*]] = fmul double [[EXT]], [[EXT3]] +// X86WINPRMTD-NEXT: [[TMP8:%.*]] = fsub double [[TMP6]], [[TMP7]] +// X86WINPRMTD-NEXT: [[TMP9:%.*]] = fdiv double [[TMP2]], [[TMP5]] +// X86WINPRMTD-NEXT: [[TMP10:%.*]] = fdiv double [[TMP8]], [[TMP5]] +// X86WINPRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP9]] to float +// X86WINPRMTD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc double [[TMP10]] to float +// X86WINPRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// X86WINPRMTD-NEXT: store float [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[TMP11:%.*]] = load i64, ptr [[RETVAL]], align 4 +// X86WINPRMTD-NEXT: ret i64 [[TMP11]] +// +// AVRFP32-LABEL: define dso_local { float, float } @divf( +// AVRFP32-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0:[0-9]+]] { +// AVRFP32-NEXT: entry: +// AVRFP32-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[B:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP32-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP32-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP32-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP32-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP32-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP32-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 1 +// AVRFP32-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP4:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[B_REAL]]) +// AVRFP32-NEXT: [[TMP5:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[B_IMAG]]) +// AVRFP32-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP4]], [[TMP5]] +// AVRFP32-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// AVRFP32: abs_rhsr_greater_or_equal_abs_rhsi: +// AVRFP32-NEXT: [[TMP6:%.*]] = fdiv float [[B_IMAG]], [[B_REAL]] +// AVRFP32-NEXT: [[TMP7:%.*]] = fmul float [[TMP6]], [[B_IMAG]] +// AVRFP32-NEXT: [[TMP8:%.*]] = fadd float [[B_REAL]], [[TMP7]] +// AVRFP32-NEXT: [[TMP9:%.*]] = fmul float [[A_IMAG]], [[TMP6]] +// AVRFP32-NEXT: [[TMP10:%.*]] = fadd float [[A_REAL]], [[TMP9]] +// AVRFP32-NEXT: [[TMP11:%.*]] = fdiv float [[TMP10]], [[TMP8]] +// AVRFP32-NEXT: [[TMP12:%.*]] = fmul float [[A_REAL]], [[TMP6]] +// AVRFP32-NEXT: [[TMP13:%.*]] = fsub float [[A_IMAG]], [[TMP12]] +// AVRFP32-NEXT: [[TMP14:%.*]] = fdiv float [[TMP13]], [[TMP8]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV:%.*]] +// AVRFP32: abs_rhsr_less_than_abs_rhsi: +// AVRFP32-NEXT: [[TMP15:%.*]] = fdiv float [[B_REAL]], [[B_IMAG]] +// AVRFP32-NEXT: [[TMP16:%.*]] = fmul float [[TMP15]], [[B_REAL]] +// AVRFP32-NEXT: [[TMP17:%.*]] = fadd float [[B_IMAG]], [[TMP16]] +// AVRFP32-NEXT: [[TMP18:%.*]] = fmul float [[A_REAL]], [[TMP15]] +// AVRFP32-NEXT: [[TMP19:%.*]] = fadd float [[TMP18]], [[A_IMAG]] +// AVRFP32-NEXT: [[TMP20:%.*]] = fdiv float [[TMP19]], [[TMP17]] +// AVRFP32-NEXT: [[TMP21:%.*]] = fmul float [[A_IMAG]], [[TMP15]] +// AVRFP32-NEXT: [[TMP22:%.*]] = fsub float [[TMP21]], [[A_REAL]] +// AVRFP32-NEXT: [[TMP23:%.*]] = fdiv float [[TMP22]], [[TMP17]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV]] +// AVRFP32: complex_div: +// AVRFP32-NEXT: [[TMP24:%.*]] = phi float [ [[TMP11]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP20]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[TMP25:%.*]] = phi float [ [[TMP14]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP23]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP32-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[TMP24]], ptr [[RETVAL_REALP]], align 1 +// AVRFP32-NEXT: store float [[TMP25]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP26:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP32-NEXT: ret { float, float } [[TMP26]] +// +// AVRFP64-LABEL: define dso_local { float, float } @divf( +// AVRFP64-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0:[0-9]+]] { +// AVRFP64-NEXT: entry: +// AVRFP64-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[B:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP64-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP64-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP64-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP64-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP64-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP64-NEXT: [[EXT:%.*]] = fpext float [[A_REAL]] to double +// AVRFP64-NEXT: [[EXT1:%.*]] = fpext float [[A_IMAG]] to double +// AVRFP64-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 1 +// AVRFP64-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 1 +// AVRFP64-NEXT: [[EXT2:%.*]] = fpext float [[B_REAL]] to double +// AVRFP64-NEXT: [[EXT3:%.*]] = fpext float [[B_IMAG]] to double +// AVRFP64-NEXT: [[TMP4:%.*]] = fmul double [[EXT]], [[EXT2]] +// AVRFP64-NEXT: [[TMP5:%.*]] = fmul double [[EXT1]], [[EXT3]] +// AVRFP64-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]] +// AVRFP64-NEXT: [[TMP7:%.*]] = fmul double [[EXT2]], [[EXT2]] +// AVRFP64-NEXT: [[TMP8:%.*]] = fmul double [[EXT3]], [[EXT3]] +// AVRFP64-NEXT: [[TMP9:%.*]] = fadd double [[TMP7]], [[TMP8]] +// AVRFP64-NEXT: [[TMP10:%.*]] = fmul double [[EXT1]], [[EXT2]] +// AVRFP64-NEXT: [[TMP11:%.*]] = fmul double [[EXT]], [[EXT3]] +// AVRFP64-NEXT: [[TMP12:%.*]] = fsub double [[TMP10]], [[TMP11]] +// AVRFP64-NEXT: [[TMP13:%.*]] = fdiv double [[TMP6]], [[TMP9]] +// AVRFP64-NEXT: [[TMP14:%.*]] = fdiv double [[TMP12]], [[TMP9]] +// AVRFP64-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP13]] to float +// AVRFP64-NEXT: [[UNPROMOTION4:%.*]] = fptrunc double [[TMP14]] to float +// AVRFP64-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP64-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 1 +// AVRFP64-NEXT: store float [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP64-NEXT: [[TMP15:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP64-NEXT: ret { float, float } [[TMP15]] +// +// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @divf( +// BASIC_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// BASIC_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP0]], [[TMP1]] +// BASIC_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[B_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[B_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP5:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP3]], [[TMP4]] +// BASIC_FAST-NEXT: [[TMP6:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP8:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP6]], [[TMP7]] +// BASIC_FAST-NEXT: [[TMP9:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP2]], [[TMP5]] +// BASIC_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP8]], [[TMP5]] +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store float [[TMP9]], ptr [[RETVAL_REALP]], align 4 +// BASIC_FAST-NEXT: store float [[TMP10]], ptr [[RETVAL_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// BASIC_FAST-NEXT: ret <2 x float> [[TMP11]] +// +// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @divf( +// FULL_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// FULL_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) <2 x float> @__divsc3(float noundef nofpclass(nan inf) [[A_REAL]], float noundef nofpclass(nan inf) [[A_IMAG]], float noundef nofpclass(nan inf) [[B_REAL]], float noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2:[0-9]+]] +// FULL_FAST-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL_FAST-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL_FAST-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL_FAST-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL_FAST-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store float [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 4 +// FULL_FAST-NEXT: store float [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 4 +// FULL_FAST-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// FULL_FAST-NEXT: ret <2 x float> [[TMP0]] +// +// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @divf( +// IMPRVD_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// IMPRVD_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[B_REAL]]) +// IMPRVD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[B_IMAG]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt float [[TMP0]], [[TMP1]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[B_IMAG]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP2]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[B_REAL]], [[TMP3]] +// IMPRVD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[TMP5]] +// IMPRVD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP6]], [[TMP4]] +// IMPRVD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[TMP8]] +// IMPRVD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP9]], [[TMP4]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[B_REAL]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP11]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[B_IMAG]], [[TMP12]] +// IMPRVD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP14]], [[A_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP15]], [[TMP13]] +// IMPRVD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP17]], [[A_REAL]] +// IMPRVD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP18]], [[TMP13]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD_FAST: complex_div: +// IMPRVD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store float [[TMP20]], ptr [[RETVAL_REALP]], align 4 +// IMPRVD_FAST-NEXT: store float [[TMP21]], ptr [[RETVAL_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[TMP22:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// IMPRVD_FAST-NEXT: ret <2 x float> [[TMP22]] +// +// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @divf( +// PRMTD_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[EXT:%.*]] = fpext float [[A_REAL]] to double +// PRMTD_FAST-NEXT: [[EXT1:%.*]] = fpext float [[A_IMAG]] to double +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[EXT2:%.*]] = fpext float [[B_REAL]] to double +// PRMTD_FAST-NEXT: [[EXT3:%.*]] = fpext float [[B_IMAG]] to double +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT1]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[TMP0]], [[TMP1]] +// PRMTD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT2]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT3]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP5:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[TMP3]], [[TMP4]] +// PRMTD_FAST-NEXT: [[TMP6:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT1]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP8:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[TMP6]], [[TMP7]] +// PRMTD_FAST-NEXT: [[TMP9:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP2]], [[TMP5]] +// PRMTD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP8]], [[TMP5]] +// PRMTD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP9]] to float +// PRMTD_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc double [[TMP10]] to float +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// PRMTD_FAST-NEXT: store float [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD_FAST-NEXT: ret <2 x float> [[TMP11]] +// _Complex float divf(_Complex float a, _Complex float b) { - // LABEL: define {{.*}} @divf( - // FULL: call {{.*}} @__divsc3 - // FULL_FAST: call {{.*}} @__divsc3 - // - // BASIC: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fadd{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fadd{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fsub{{.*}}float - // BASIC-NEXT: fdiv{{.*}}float - // BASIC-NEXT: fdiv{{.*}}float - // - // IMPRVD: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // IMPRVD-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // IMPRVD-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} - // IMPRVD-NEXT: br i1 {{.*}}, label - // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fsub{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: br label - // IMPRVD: abs_rhsr_less_than_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fsub{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // - // PRMTD: load float, ptr {{.*}} - // PRMTD: fpext float {{.*}} to double - // PRMTD-NEXT: fpext float {{.*}} to double - // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load float, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load float, ptr {{.*}} - // PRMTD-NEXT: fpext float {{.*}} to double - // PRMTD-NEXT: fpext float {{.*}} to double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fadd{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fadd{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fsub{{.*}}double - // PRMTD-NEXT: fdiv{{.*}}double - // PRMTD-NEXT: fdiv{{.*}}double - // PRMTD-NEXT: fptrunc double {{.*}} to float - // PRMTD-NEXT: fptrunc double {{.*}} to float - return a / b; } +// FULL-LABEL: define dso_local <2 x float> @mulf( +// FULL-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// FULL-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// FULL-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// FULL-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// FULL-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// FULL-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// FULL-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// FULL-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// FULL-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno float [[MUL_R]], [[MUL_R]] +// FULL-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// FULL: complex_mul_imag_nan: +// FULL-NEXT: [[ISNAN_CMP1:%.*]] = fcmp uno float [[MUL_I]], [[MUL_I]] +// FULL-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL: complex_mul_libcall: +// FULL-NEXT: [[CALL:%.*]] = call <2 x float> @__mulsc3(float noundef [[A_REAL]], float noundef [[A_IMAG]], float noundef [[B_REAL]], float noundef [[B_IMAG]]) #[[ATTR2]] +// FULL-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL: complex_mul_cont: +// FULL-NEXT: [[REAL_MUL_PHI:%.*]] = phi float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[IMAG_MUL_PHI:%.*]] = phi float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store float [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 4 +// FULL-NEXT: store float [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 4 +// FULL-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// FULL-NEXT: ret <2 x float> [[TMP0]] +// +// BASIC-LABEL: define dso_local <2 x float> @mulf( +// BASIC-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// BASIC-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// BASIC-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// BASIC-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// BASIC-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// BASIC-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// BASIC-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// BASIC-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// BASIC-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// BASIC-NEXT: ret <2 x float> [[TMP0]] +// +// IMPRVD-LABEL: define dso_local <2 x float> @mulf( +// IMPRVD-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// IMPRVD-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// IMPRVD-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// IMPRVD-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// IMPRVD-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// IMPRVD-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// IMPRVD-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// IMPRVD-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// IMPRVD-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// IMPRVD-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// IMPRVD-NEXT: ret <2 x float> [[TMP0]] +// +// PRMTD-LABEL: define dso_local <2 x float> @mulf( +// PRMTD-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// PRMTD-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// PRMTD-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// PRMTD-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// PRMTD-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// PRMTD-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// PRMTD-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// PRMTD-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD-NEXT: ret <2 x float> [[TMP0]] +// +// X86WINPRMTD-LABEL: define dso_local i64 @mulf( +// X86WINPRMTD-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: store i64 [[A_COERCE]], ptr [[A]], align 4 +// X86WINPRMTD-NEXT: store i64 [[B_COERCE]], ptr [[B]], align 4 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// X86WINPRMTD-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// X86WINPRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// X86WINPRMTD-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = load i64, ptr [[RETVAL]], align 4 +// X86WINPRMTD-NEXT: ret i64 [[TMP0]] +// +// AVRFP32-LABEL: define dso_local { float, float } @mulf( +// AVRFP32-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP32-NEXT: entry: +// AVRFP32-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[B:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP32-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP32-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP32-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP32-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP32-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP32-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 1 +// AVRFP32-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 1 +// AVRFP32-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// AVRFP32-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// AVRFP32-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// AVRFP32-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// AVRFP32-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// AVRFP32-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// AVRFP32-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP32-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 1 +// AVRFP32-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP4:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP32-NEXT: ret { float, float } [[TMP4]] +// +// AVRFP64-LABEL: define dso_local { float, float } @mulf( +// AVRFP64-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP64-NEXT: entry: +// AVRFP64-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[B:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP64-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP64-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP64-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP64-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP64-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP64-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 1 +// AVRFP64-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 1 +// AVRFP64-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// AVRFP64-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// AVRFP64-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// AVRFP64-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// AVRFP64-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// AVRFP64-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// AVRFP64-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP64-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 1 +// AVRFP64-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP64-NEXT: [[TMP4:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP64-NEXT: ret { float, float } [[TMP4]] +// +// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @mulf( +// BASIC_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// BASIC_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_REAL]] +// BASIC_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// BASIC_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// BASIC_FAST-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// BASIC_FAST-NEXT: ret <2 x float> [[TMP0]] +// +// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @mulf( +// FULL_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// FULL_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// FULL_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_REAL]] +// FULL_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_IMAG]] +// FULL_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_IMAG]] +// FULL_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_REAL]] +// FULL_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// FULL_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// FULL_FAST-NEXT: [[ISNAN_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno float [[MUL_R]], [[MUL_R]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// FULL_FAST: complex_mul_imag_nan: +// FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno float [[MUL_I]], [[MUL_I]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL_FAST: complex_mul_libcall: +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) <2 x float> @__mulsc3(float noundef nofpclass(nan inf) [[A_REAL]], float noundef nofpclass(nan inf) [[A_IMAG]], float noundef nofpclass(nan inf) [[B_REAL]], float noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// FULL_FAST-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL_FAST-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL_FAST-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL_FAST-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL_FAST: complex_mul_cont: +// FULL_FAST-NEXT: [[REAL_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[IMAG_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store float [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 4 +// FULL_FAST-NEXT: store float [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 4 +// FULL_FAST-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// FULL_FAST-NEXT: ret <2 x float> [[TMP0]] +// +// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @mulf( +// IMPRVD_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// IMPRVD_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// IMPRVD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// IMPRVD_FAST-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// IMPRVD_FAST-NEXT: ret <2 x float> [[TMP0]] +// +// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @mulf( +// PRMTD_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x float> noundef nofpclass(nan inf) [[B_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD_FAST-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[MUL_AC]], [[MUL_BD]] +// PRMTD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[MUL_AD]], [[MUL_BC]] +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// PRMTD_FAST-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD_FAST-NEXT: ret <2 x float> [[TMP0]] +// _Complex float mulf(_Complex float a, _Complex float b) { - // LABEL: define {{.*}} @mulf( - // FULL: call {{.*}} @__mulsc3 - // - // FULL_FAST: alloca { float, float } - // FULL_FAST-NEXT: alloca { float, float } - // FULL_FAST-NEXT: alloca { float, float } - // FULL_FAST: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // FULL_FAST-NEXT: load float, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load float, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // FULL_FAST-NEXT: load float, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fmul{{.*}}float - // FULL_FAST-NEXT: fsub{{.*}}float - // FULL_FAST-NEXT: fadd{{.*}}float - - // BASIC: alloca { float, float } - // BASIC-NEXT: alloca { float, float } - // BASIC-NEXT: alloca { float, float } - // BASIC: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // BASIC-NEXT: load float, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load float, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // BASIC-NEXT: load float, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fsub{{.*}}float - // BASIC-NEXT: fadd{{.*}}float - // - // IMPRVD: alloca { float, float } - // IMPRVD-NEXT: alloca { float, float } - // IMPRVD-NEXT: alloca { float, float } - // IMPRVD: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // IMPRVD-NEXT: load float, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load float, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // IMPRVD-NEXT: load float, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fsub{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // - // PRMTD: alloca { float, float } - // PRMTD-NEXT: alloca { float, float } - // PRMTD-NEXT: alloca { float, float } - // PRMTD: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load float, ptr - // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load float, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load float, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { float, float }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fsub{{.*}}float - // PRMTD-NEXT: fadd{{.*}}float - return a * b; } +// FULL-LABEL: define dso_local { double, double } @divd( +// FULL-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// FULL-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// FULL-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// FULL-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// FULL-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// FULL-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// FULL-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// FULL-NEXT: [[CALL:%.*]] = call { double, double } @__divdc3(double noundef [[A_REAL]], double noundef [[A_IMAG]], double noundef [[B_REAL]], double noundef [[B_IMAG]]) #[[ATTR2]] +// FULL-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0 +// FULL-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1 +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store double [[TMP4]], ptr [[RETVAL_REALP]], align 8 +// FULL-NEXT: store double [[TMP5]], ptr [[RETVAL_IMAGP]], align 8 +// FULL-NEXT: [[TMP6:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// FULL-NEXT: ret { double, double } [[TMP6]] +// +// BASIC-LABEL: define dso_local { double, double } @divd( +// BASIC-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// BASIC-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// BASIC-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// BASIC-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// BASIC-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// BASIC-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// BASIC-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// BASIC-NEXT: [[TMP4:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// BASIC-NEXT: [[TMP5:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]] +// BASIC-NEXT: [[TMP7:%.*]] = fmul double [[B_REAL]], [[B_REAL]] +// BASIC-NEXT: [[TMP8:%.*]] = fmul double [[B_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[TMP9:%.*]] = fadd double [[TMP7]], [[TMP8]] +// BASIC-NEXT: [[TMP10:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// BASIC-NEXT: [[TMP11:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// BASIC-NEXT: [[TMP12:%.*]] = fsub double [[TMP10]], [[TMP11]] +// BASIC-NEXT: [[TMP13:%.*]] = fdiv double [[TMP6]], [[TMP9]] +// BASIC-NEXT: [[TMP14:%.*]] = fdiv double [[TMP12]], [[TMP9]] +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store double [[TMP13]], ptr [[RETVAL_REALP]], align 8 +// BASIC-NEXT: store double [[TMP14]], ptr [[RETVAL_IMAGP]], align 8 +// BASIC-NEXT: [[TMP15:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// BASIC-NEXT: ret { double, double } [[TMP15]] +// +// IMPRVD-LABEL: define dso_local { double, double } @divd( +// IMPRVD-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR2:[0-9]+]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// IMPRVD-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// IMPRVD-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// IMPRVD-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// IMPRVD-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// IMPRVD-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// IMPRVD-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// IMPRVD-NEXT: [[TMP4:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) +// IMPRVD-NEXT: [[TMP5:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) +// IMPRVD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP4]], [[TMP5]] +// IMPRVD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD-NEXT: [[TMP6:%.*]] = fdiv double [[B_IMAG]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP7:%.*]] = fmul double [[TMP6]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP8:%.*]] = fadd double [[B_REAL]], [[TMP7]] +// IMPRVD-NEXT: [[TMP9:%.*]] = fmul double [[A_IMAG]], [[TMP6]] +// IMPRVD-NEXT: [[TMP10:%.*]] = fadd double [[A_REAL]], [[TMP9]] +// IMPRVD-NEXT: [[TMP11:%.*]] = fdiv double [[TMP10]], [[TMP8]] +// IMPRVD-NEXT: [[TMP12:%.*]] = fmul double [[A_REAL]], [[TMP6]] +// IMPRVD-NEXT: [[TMP13:%.*]] = fsub double [[A_IMAG]], [[TMP12]] +// IMPRVD-NEXT: [[TMP14:%.*]] = fdiv double [[TMP13]], [[TMP8]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi: +// IMPRVD-NEXT: [[TMP15:%.*]] = fdiv double [[B_REAL]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP16:%.*]] = fmul double [[TMP15]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP17:%.*]] = fadd double [[B_IMAG]], [[TMP16]] +// IMPRVD-NEXT: [[TMP18:%.*]] = fmul double [[A_REAL]], [[TMP15]] +// IMPRVD-NEXT: [[TMP19:%.*]] = fadd double [[TMP18]], [[A_IMAG]] +// IMPRVD-NEXT: [[TMP20:%.*]] = fdiv double [[TMP19]], [[TMP17]] +// IMPRVD-NEXT: [[TMP21:%.*]] = fmul double [[A_IMAG]], [[TMP15]] +// IMPRVD-NEXT: [[TMP22:%.*]] = fsub double [[TMP21]], [[A_REAL]] +// IMPRVD-NEXT: [[TMP23:%.*]] = fdiv double [[TMP22]], [[TMP17]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD: complex_div: +// IMPRVD-NEXT: [[TMP24:%.*]] = phi double [ [[TMP11]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP20]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[TMP25:%.*]] = phi double [ [[TMP14]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP23]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store double [[TMP24]], ptr [[RETVAL_REALP]], align 8 +// IMPRVD-NEXT: store double [[TMP25]], ptr [[RETVAL_IMAGP]], align 8 +// IMPRVD-NEXT: [[TMP26:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// IMPRVD-NEXT: ret { double, double } [[TMP26]] +// +// PRMTD-LABEL: define dso_local { double, double } @divd( +// PRMTD-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// PRMTD-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// PRMTD-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// PRMTD-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// PRMTD-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// PRMTD-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// PRMTD-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// PRMTD-NEXT: [[EXT:%.*]] = fpext double [[A_REAL]] to x86_fp80 +// PRMTD-NEXT: [[EXT1:%.*]] = fpext double [[A_IMAG]] to x86_fp80 +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// PRMTD-NEXT: [[EXT2:%.*]] = fpext double [[B_REAL]] to x86_fp80 +// PRMTD-NEXT: [[EXT3:%.*]] = fpext double [[B_IMAG]] to x86_fp80 +// PRMTD-NEXT: [[TMP4:%.*]] = fmul x86_fp80 [[EXT]], [[EXT2]] +// PRMTD-NEXT: [[TMP5:%.*]] = fmul x86_fp80 [[EXT1]], [[EXT3]] +// PRMTD-NEXT: [[TMP6:%.*]] = fadd x86_fp80 [[TMP4]], [[TMP5]] +// PRMTD-NEXT: [[TMP7:%.*]] = fmul x86_fp80 [[EXT2]], [[EXT2]] +// PRMTD-NEXT: [[TMP8:%.*]] = fmul x86_fp80 [[EXT3]], [[EXT3]] +// PRMTD-NEXT: [[TMP9:%.*]] = fadd x86_fp80 [[TMP7]], [[TMP8]] +// PRMTD-NEXT: [[TMP10:%.*]] = fmul x86_fp80 [[EXT1]], [[EXT2]] +// PRMTD-NEXT: [[TMP11:%.*]] = fmul x86_fp80 [[EXT]], [[EXT3]] +// PRMTD-NEXT: [[TMP12:%.*]] = fsub x86_fp80 [[TMP10]], [[TMP11]] +// PRMTD-NEXT: [[TMP13:%.*]] = fdiv x86_fp80 [[TMP6]], [[TMP9]] +// PRMTD-NEXT: [[TMP14:%.*]] = fdiv x86_fp80 [[TMP12]], [[TMP9]] +// PRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc x86_fp80 [[TMP13]] to double +// PRMTD-NEXT: [[UNPROMOTION4:%.*]] = fptrunc x86_fp80 [[TMP14]] to double +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store double [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 8 +// PRMTD-NEXT: store double [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 8 +// PRMTD-NEXT: [[TMP15:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// PRMTD-NEXT: ret { double, double } [[TMP15]] +// +// X86WINPRMTD-LABEL: define dso_local void @divd( +// X86WINPRMTD-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) +// X86WINPRMTD-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) +// X86WINPRMTD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP0]], [[TMP1]] +// X86WINPRMTD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// X86WINPRMTD: abs_rhsr_greater_or_equal_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP2:%.*]] = fdiv double [[B_IMAG]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[TMP3:%.*]] = fmul double [[TMP2]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[TMP4:%.*]] = fadd double [[B_REAL]], [[TMP3]] +// X86WINPRMTD-NEXT: [[TMP5:%.*]] = fmul double [[A_IMAG]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP6:%.*]] = fadd double [[A_REAL]], [[TMP5]] +// X86WINPRMTD-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], [[TMP4]] +// X86WINPRMTD-NEXT: [[TMP8:%.*]] = fmul double [[A_REAL]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP9:%.*]] = fsub double [[A_IMAG]], [[TMP8]] +// X86WINPRMTD-NEXT: [[TMP10:%.*]] = fdiv double [[TMP9]], [[TMP4]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV:%.*]] +// X86WINPRMTD: abs_rhsr_less_than_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP11:%.*]] = fdiv double [[B_REAL]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[TMP12:%.*]] = fmul double [[TMP11]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[TMP13:%.*]] = fadd double [[B_IMAG]], [[TMP12]] +// X86WINPRMTD-NEXT: [[TMP14:%.*]] = fmul double [[A_REAL]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP15:%.*]] = fadd double [[TMP14]], [[A_IMAG]] +// X86WINPRMTD-NEXT: [[TMP16:%.*]] = fdiv double [[TMP15]], [[TMP13]] +// X86WINPRMTD-NEXT: [[TMP17:%.*]] = fmul double [[A_IMAG]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP18:%.*]] = fsub double [[TMP17]], [[A_REAL]] +// X86WINPRMTD-NEXT: [[TMP19:%.*]] = fdiv double [[TMP18]], [[TMP13]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV]] +// X86WINPRMTD: complex_div: +// X86WINPRMTD-NEXT: [[TMP20:%.*]] = phi double [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[TMP20]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD-NEXT: store double [[TMP21]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD-NEXT: ret void +// +// AVRFP32-LABEL: define dso_local { float, float } @divd( +// AVRFP32-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP32-NEXT: entry: +// AVRFP32-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// AVRFP32-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// AVRFP32-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 4 +// AVRFP32-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 4 +// AVRFP32-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 4 +// AVRFP32-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 4 +// AVRFP32-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// AVRFP32-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// AVRFP32-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// AVRFP32-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// AVRFP32-NEXT: [[TMP4:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[B_REAL]]) +// AVRFP32-NEXT: [[TMP5:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[B_IMAG]]) +// AVRFP32-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP4]], [[TMP5]] +// AVRFP32-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// AVRFP32: abs_rhsr_greater_or_equal_abs_rhsi: +// AVRFP32-NEXT: [[TMP6:%.*]] = fdiv float [[B_IMAG]], [[B_REAL]] +// AVRFP32-NEXT: [[TMP7:%.*]] = fmul float [[TMP6]], [[B_IMAG]] +// AVRFP32-NEXT: [[TMP8:%.*]] = fadd float [[B_REAL]], [[TMP7]] +// AVRFP32-NEXT: [[TMP9:%.*]] = fmul float [[A_IMAG]], [[TMP6]] +// AVRFP32-NEXT: [[TMP10:%.*]] = fadd float [[A_REAL]], [[TMP9]] +// AVRFP32-NEXT: [[TMP11:%.*]] = fdiv float [[TMP10]], [[TMP8]] +// AVRFP32-NEXT: [[TMP12:%.*]] = fmul float [[A_REAL]], [[TMP6]] +// AVRFP32-NEXT: [[TMP13:%.*]] = fsub float [[A_IMAG]], [[TMP12]] +// AVRFP32-NEXT: [[TMP14:%.*]] = fdiv float [[TMP13]], [[TMP8]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV:%.*]] +// AVRFP32: abs_rhsr_less_than_abs_rhsi: +// AVRFP32-NEXT: [[TMP15:%.*]] = fdiv float [[B_REAL]], [[B_IMAG]] +// AVRFP32-NEXT: [[TMP16:%.*]] = fmul float [[TMP15]], [[B_REAL]] +// AVRFP32-NEXT: [[TMP17:%.*]] = fadd float [[B_IMAG]], [[TMP16]] +// AVRFP32-NEXT: [[TMP18:%.*]] = fmul float [[A_REAL]], [[TMP15]] +// AVRFP32-NEXT: [[TMP19:%.*]] = fadd float [[TMP18]], [[A_IMAG]] +// AVRFP32-NEXT: [[TMP20:%.*]] = fdiv float [[TMP19]], [[TMP17]] +// AVRFP32-NEXT: [[TMP21:%.*]] = fmul float [[A_IMAG]], [[TMP15]] +// AVRFP32-NEXT: [[TMP22:%.*]] = fsub float [[TMP21]], [[A_REAL]] +// AVRFP32-NEXT: [[TMP23:%.*]] = fdiv float [[TMP22]], [[TMP17]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV]] +// AVRFP32: complex_div: +// AVRFP32-NEXT: [[TMP24:%.*]] = phi float [ [[TMP11]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP20]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[TMP25:%.*]] = phi float [ [[TMP14]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP23]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP32-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[TMP24]], ptr [[RETVAL_REALP]], align 1 +// AVRFP32-NEXT: store float [[TMP25]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP26:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP32-NEXT: ret { float, float } [[TMP26]] +// +// AVRFP64-LABEL: define dso_local void @divd( +// AVRFP64-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 1 [[AGG_RESULT:%.*]], double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP64-NEXT: entry: +// AVRFP64-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// AVRFP64-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// AVRFP64-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// AVRFP64-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// AVRFP64-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// AVRFP64-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// AVRFP64-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// AVRFP64-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// AVRFP64-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// AVRFP64-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// AVRFP64-NEXT: [[TMP4:%.*]] = call addrspace(1) double @llvm.fabs.f64(double [[B_REAL]]) +// AVRFP64-NEXT: [[TMP5:%.*]] = call addrspace(1) double @llvm.fabs.f64(double [[B_IMAG]]) +// AVRFP64-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP4]], [[TMP5]] +// AVRFP64-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// AVRFP64: abs_rhsr_greater_or_equal_abs_rhsi: +// AVRFP64-NEXT: [[TMP6:%.*]] = fdiv double [[B_IMAG]], [[B_REAL]] +// AVRFP64-NEXT: [[TMP7:%.*]] = fmul double [[TMP6]], [[B_IMAG]] +// AVRFP64-NEXT: [[TMP8:%.*]] = fadd double [[B_REAL]], [[TMP7]] +// AVRFP64-NEXT: [[TMP9:%.*]] = fmul double [[A_IMAG]], [[TMP6]] +// AVRFP64-NEXT: [[TMP10:%.*]] = fadd double [[A_REAL]], [[TMP9]] +// AVRFP64-NEXT: [[TMP11:%.*]] = fdiv double [[TMP10]], [[TMP8]] +// AVRFP64-NEXT: [[TMP12:%.*]] = fmul double [[A_REAL]], [[TMP6]] +// AVRFP64-NEXT: [[TMP13:%.*]] = fsub double [[A_IMAG]], [[TMP12]] +// AVRFP64-NEXT: [[TMP14:%.*]] = fdiv double [[TMP13]], [[TMP8]] +// AVRFP64-NEXT: br label [[COMPLEX_DIV:%.*]] +// AVRFP64: abs_rhsr_less_than_abs_rhsi: +// AVRFP64-NEXT: [[TMP15:%.*]] = fdiv double [[B_REAL]], [[B_IMAG]] +// AVRFP64-NEXT: [[TMP16:%.*]] = fmul double [[TMP15]], [[B_REAL]] +// AVRFP64-NEXT: [[TMP17:%.*]] = fadd double [[B_IMAG]], [[TMP16]] +// AVRFP64-NEXT: [[TMP18:%.*]] = fmul double [[A_REAL]], [[TMP15]] +// AVRFP64-NEXT: [[TMP19:%.*]] = fadd double [[TMP18]], [[A_IMAG]] +// AVRFP64-NEXT: [[TMP20:%.*]] = fdiv double [[TMP19]], [[TMP17]] +// AVRFP64-NEXT: [[TMP21:%.*]] = fmul double [[A_IMAG]], [[TMP15]] +// AVRFP64-NEXT: [[TMP22:%.*]] = fsub double [[TMP21]], [[A_REAL]] +// AVRFP64-NEXT: [[TMP23:%.*]] = fdiv double [[TMP22]], [[TMP17]] +// AVRFP64-NEXT: br label [[COMPLEX_DIV]] +// AVRFP64: complex_div: +// AVRFP64-NEXT: [[TMP24:%.*]] = phi double [ [[TMP11]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP20]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP64-NEXT: [[TMP25:%.*]] = phi double [ [[TMP14]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP23]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP64-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[TMP24]], ptr [[AGG_RESULT_REALP]], align 1 +// AVRFP64-NEXT: store double [[TMP25]], ptr [[AGG_RESULT_IMAGP]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1 +// AVRFP64-NEXT: ret void +// +// BASIC_FAST-LABEL: define dso_local { double, double } @divd( +// BASIC_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// BASIC_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// BASIC_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// BASIC_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// BASIC_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// BASIC_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[TMP4]], [[TMP5]] +// BASIC_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[B_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[B_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP9:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[TMP7]], [[TMP8]] +// BASIC_FAST-NEXT: [[TMP10:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP11:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP12:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[TMP10]], [[TMP11]] +// BASIC_FAST-NEXT: [[TMP13:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP6]], [[TMP9]] +// BASIC_FAST-NEXT: [[TMP14:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP12]], [[TMP9]] +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store double [[TMP13]], ptr [[RETVAL_REALP]], align 8 +// BASIC_FAST-NEXT: store double [[TMP14]], ptr [[RETVAL_IMAGP]], align 8 +// BASIC_FAST-NEXT: [[TMP15:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// BASIC_FAST-NEXT: ret { double, double } [[TMP15]] +// +// FULL_FAST-LABEL: define dso_local { double, double } @divd( +// FULL_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// FULL_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// FULL_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// FULL_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// FULL_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// FULL_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// FULL_FAST-NEXT: [[CALL:%.*]] = call { double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0 +// FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1 +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store double [[TMP4]], ptr [[RETVAL_REALP]], align 8 +// FULL_FAST-NEXT: store double [[TMP5]], ptr [[RETVAL_IMAGP]], align 8 +// FULL_FAST-NEXT: [[TMP6:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// FULL_FAST-NEXT: ret { double, double } [[TMP6]] +// +// IMPRVD_FAST-LABEL: define dso_local { double, double } @divd( +// IMPRVD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR2:[0-9]+]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// IMPRVD_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// IMPRVD_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// IMPRVD_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// IMPRVD_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = call reassoc nnan ninf nsz arcp afn double @llvm.fabs.f64(double [[B_REAL]]) +// IMPRVD_FAST-NEXT: [[TMP5:%.*]] = call reassoc nnan ninf nsz arcp afn double @llvm.fabs.f64(double [[B_IMAG]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt double [[TMP4]], [[TMP5]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP6:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[B_IMAG]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[TMP6]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP8:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[B_REAL]], [[TMP7]] +// IMPRVD_FAST-NEXT: [[TMP9:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[TMP6]] +// IMPRVD_FAST-NEXT: [[TMP10:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[TMP9]] +// IMPRVD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP10]], [[TMP8]] +// IMPRVD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[TMP6]] +// IMPRVD_FAST-NEXT: [[TMP13:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[TMP12]] +// IMPRVD_FAST-NEXT: [[TMP14:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP13]], [[TMP8]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP15:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[B_REAL]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP16:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[TMP15]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP17:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[B_IMAG]], [[TMP16]] +// IMPRVD_FAST-NEXT: [[TMP18:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[TMP15]] +// IMPRVD_FAST-NEXT: [[TMP19:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[TMP18]], [[A_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP20:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP19]], [[TMP17]] +// IMPRVD_FAST-NEXT: [[TMP21:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[TMP15]] +// IMPRVD_FAST-NEXT: [[TMP22:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[TMP21]], [[A_REAL]] +// IMPRVD_FAST-NEXT: [[TMP23:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP22]], [[TMP17]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD_FAST: complex_div: +// IMPRVD_FAST-NEXT: [[TMP24:%.*]] = phi reassoc nnan ninf nsz arcp afn double [ [[TMP11]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP20]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[TMP25:%.*]] = phi reassoc nnan ninf nsz arcp afn double [ [[TMP14]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP23]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store double [[TMP24]], ptr [[RETVAL_REALP]], align 8 +// IMPRVD_FAST-NEXT: store double [[TMP25]], ptr [[RETVAL_IMAGP]], align 8 +// IMPRVD_FAST-NEXT: [[TMP26:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// IMPRVD_FAST-NEXT: ret { double, double } [[TMP26]] +// +// PRMTD_FAST-LABEL: define dso_local { double, double } @divd( +// PRMTD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// PRMTD_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// PRMTD_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// PRMTD_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// PRMTD_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// PRMTD_FAST-NEXT: [[EXT:%.*]] = fpext double [[A_REAL]] to x86_fp80 +// PRMTD_FAST-NEXT: [[EXT1:%.*]] = fpext double [[A_IMAG]] to x86_fp80 +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// PRMTD_FAST-NEXT: [[EXT2:%.*]] = fpext double [[B_REAL]] to x86_fp80 +// PRMTD_FAST-NEXT: [[EXT3:%.*]] = fpext double [[B_IMAG]] to x86_fp80 +// PRMTD_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[EXT]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[EXT1]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP4]], [[TMP5]] +// PRMTD_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[EXT2]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[EXT3]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP9:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP7]], [[TMP8]] +// PRMTD_FAST-NEXT: [[TMP10:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[EXT1]], [[EXT2]] +// PRMTD_FAST-NEXT: [[TMP11:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[EXT]], [[EXT3]] +// PRMTD_FAST-NEXT: [[TMP12:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP10]], [[TMP11]] +// PRMTD_FAST-NEXT: [[TMP13:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP9]] +// PRMTD_FAST-NEXT: [[TMP14:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP12]], [[TMP9]] +// PRMTD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc x86_fp80 [[TMP13]] to double +// PRMTD_FAST-NEXT: [[UNPROMOTION4:%.*]] = fptrunc x86_fp80 [[TMP14]] to double +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store double [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 8 +// PRMTD_FAST-NEXT: store double [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 8 +// PRMTD_FAST-NEXT: [[TMP15:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// PRMTD_FAST-NEXT: ret { double, double } [[TMP15]] +// _Complex double divd(_Complex double a, _Complex double b) { - // LABEL: define {{.*}} @divd( - // FULL: call {{.*}} @__divdc3 - // FULL_FAST: call {{.*}} @__divdc3 - // - // BASIC: fmul{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fadd{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fadd{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fsub{{.*}}double - // BASIC-NEXT: fdiv{{.*}}double - // BASIC-NEXT: fdiv{{.*}}double - // - // IMPRVD: call{{.*}}double @llvm.fabs.f64(double {{.*}}) - // IMPRVD-NEXT: call{{.*}}double @llvm.fabs.f64(double {{.*}}) - // IMPRVD-NEXT: fcmp{{.*}}ugt double {{.*}}, {{.*}} - // IMPRVD-NEXT: br i1 {{.*}}, label - // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fadd{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fadd{{.*}}double - // IMPRVD-NEXT: fdiv{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fsub{{.*}}double - // IMPRVD-NEXT: fdiv{{.*}}double - // IMPRVD-NEXT: br label - // IMPRVD: abs_rhsr_less_than_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fadd{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fadd{{.*}}double - // IMPRVD-NEXT: fdiv{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fsub{{.*}}double - // IMPRVD-NEXT: fdiv{{.*}}double - // - // PRMTD: load double, ptr {{.*}} - // PRMTD: fpext double {{.*}} to x86_fp80 - // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 - // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load double, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load double, ptr {{.*}} - // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 - // PRMTD-NEXT: fpext double {{.*}} to x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fsub{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 - // PRMTD-NEXT: fptrunc x86_fp80 {{.*}} to double - // PRMTD-NEXT: fptrunc x86_fp80 {{.*}} to double - return a / b; } +// FULL-LABEL: define dso_local { double, double } @muld( +// FULL-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR1]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// FULL-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// FULL-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// FULL-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// FULL-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// FULL-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// FULL-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// FULL-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// FULL-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// FULL-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// FULL-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// FULL-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// FULL-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// FULL-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno double [[MUL_R]], [[MUL_R]] +// FULL-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] +// FULL: complex_mul_imag_nan: +// FULL-NEXT: [[ISNAN_CMP1:%.*]] = fcmp uno double [[MUL_I]], [[MUL_I]] +// FULL-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL: complex_mul_libcall: +// FULL-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef [[A_REAL]], double noundef [[A_IMAG]], double noundef [[B_REAL]], double noundef [[B_IMAG]]) #[[ATTR2]] +// FULL-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0 +// FULL-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1 +// FULL-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL: complex_mul_cont: +// FULL-NEXT: [[REAL_MUL_PHI:%.*]] = phi double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP4]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[IMAG_MUL_PHI:%.*]] = phi double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP5]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 +// FULL-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 +// FULL-NEXT: [[TMP6:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// FULL-NEXT: ret { double, double } [[TMP6]] +// +// BASIC-LABEL: define dso_local { double, double } @muld( +// BASIC-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR1]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// BASIC-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// BASIC-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// BASIC-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// BASIC-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// BASIC-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// BASIC-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// BASIC-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// BASIC-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// BASIC-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// BASIC-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// BASIC-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// BASIC-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// BASIC-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// BASIC-NEXT: ret { double, double } [[TMP4]] +// +// IMPRVD-LABEL: define dso_local { double, double } @muld( +// IMPRVD-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR2]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// IMPRVD-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// IMPRVD-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// IMPRVD-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// IMPRVD-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// IMPRVD-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// IMPRVD-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// IMPRVD-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// IMPRVD-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// IMPRVD-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// IMPRVD-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// IMPRVD-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// IMPRVD-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// IMPRVD-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// IMPRVD-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// IMPRVD-NEXT: ret { double, double } [[TMP4]] +// +// PRMTD-LABEL: define dso_local { double, double } @muld( +// PRMTD-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR1]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// PRMTD-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// PRMTD-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// PRMTD-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// PRMTD-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// PRMTD-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// PRMTD-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// PRMTD-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// PRMTD-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// PRMTD-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// PRMTD-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// PRMTD-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// PRMTD-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// PRMTD-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// PRMTD-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// PRMTD-NEXT: ret { double, double } [[TMP4]] +// +// X86WINPRMTD-LABEL: define dso_local void @muld( +// X86WINPRMTD-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// X86WINPRMTD-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[MUL_R]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD-NEXT: store double [[MUL_I]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD-NEXT: ret void +// +// AVRFP32-LABEL: define dso_local { float, float } @muld( +// AVRFP32-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP32-NEXT: entry: +// AVRFP32-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// AVRFP32-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// AVRFP32-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 4 +// AVRFP32-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 4 +// AVRFP32-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 4 +// AVRFP32-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 4 +// AVRFP32-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// AVRFP32-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// AVRFP32-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// AVRFP32-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// AVRFP32-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// AVRFP32-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// AVRFP32-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// AVRFP32-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// AVRFP32-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// AVRFP32-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// AVRFP32-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP32-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 1 +// AVRFP32-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP4:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP32-NEXT: ret { float, float } [[TMP4]] +// +// AVRFP64-LABEL: define dso_local void @muld( +// AVRFP64-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 1 [[AGG_RESULT:%.*]], double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP64-NEXT: entry: +// AVRFP64-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// AVRFP64-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// AVRFP64-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// AVRFP64-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// AVRFP64-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// AVRFP64-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// AVRFP64-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// AVRFP64-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// AVRFP64-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// AVRFP64-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// AVRFP64-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// AVRFP64-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// AVRFP64-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// AVRFP64-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// AVRFP64-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// AVRFP64-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// AVRFP64-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[MUL_R]], ptr [[AGG_RESULT_REALP]], align 1 +// AVRFP64-NEXT: store double [[MUL_I]], ptr [[AGG_RESULT_IMAGP]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1 +// AVRFP64-NEXT: ret void +// +// BASIC_FAST-LABEL: define dso_local { double, double } @muld( +// BASIC_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// BASIC_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// BASIC_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// BASIC_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// BASIC_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// BASIC_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_REAL]] +// BASIC_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[MUL_AC]], [[MUL_BD]] +// BASIC_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[MUL_AD]], [[MUL_BC]] +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// BASIC_FAST-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// BASIC_FAST-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// BASIC_FAST-NEXT: ret { double, double } [[TMP4]] +// +// FULL_FAST-LABEL: define dso_local { double, double } @muld( +// FULL_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// FULL_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// FULL_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// FULL_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// FULL_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// FULL_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// FULL_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_REAL]] +// FULL_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_IMAG]] +// FULL_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_IMAG]] +// FULL_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_REAL]] +// FULL_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[MUL_AC]], [[MUL_BD]] +// FULL_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[MUL_AD]], [[MUL_BC]] +// FULL_FAST-NEXT: [[ISNAN_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno double [[MUL_R]], [[MUL_R]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] +// FULL_FAST: complex_mul_imag_nan: +// FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno double [[MUL_I]], [[MUL_I]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL_FAST: complex_mul_libcall: +// FULL_FAST-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0 +// FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1 +// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL_FAST: complex_mul_cont: +// FULL_FAST-NEXT: [[REAL_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn double [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP4]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[IMAG_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn double [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP5]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store double [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 8 +// FULL_FAST-NEXT: store double [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 8 +// FULL_FAST-NEXT: [[TMP6:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// FULL_FAST-NEXT: ret { double, double } [[TMP6]] +// +// IMPRVD_FAST-LABEL: define dso_local { double, double } @muld( +// IMPRVD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR2]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// IMPRVD_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// IMPRVD_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// IMPRVD_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// IMPRVD_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// IMPRVD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[MUL_AC]], [[MUL_BD]] +// IMPRVD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[MUL_AD]], [[MUL_BC]] +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// IMPRVD_FAST-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// IMPRVD_FAST-NEXT: ret { double, double } [[TMP4]] +// +// PRMTD_FAST-LABEL: define dso_local { double, double } @muld( +// PRMTD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// PRMTD_FAST-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// PRMTD_FAST-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// PRMTD_FAST-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// PRMTD_FAST-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// PRMTD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_REAL]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[A_IMAG]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[MUL_AC]], [[MUL_BD]] +// PRMTD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[MUL_AD]], [[MUL_BC]] +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// PRMTD_FAST-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// PRMTD_FAST-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// PRMTD_FAST-NEXT: ret { double, double } [[TMP4]] +// _Complex double muld(_Complex double a, _Complex double b) { - // LABEL: define {{.*}} @muld( - // FULL: call {{.*}} @__muldc3 - // - // FULL_FAST: alloca { double, double } - // FULL_FAST-NEXT: alloca { double, double } - // FULL_FAST-NEXT: alloca { double, double } - // FULL_FAST: load double, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load double, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 - // FULL_FAST-NEXT: load double, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load double - // FULL_FAST-NEXT: fmul{{.*}}double - // FULL_FAST-NEXT: fmul{{.*}}double - // FULL_FAST-NEXT: fmul{{.*}}double - // FULL_FAST-NEXT: fmul{{.*}}double - // FULL_FAST-NEXT: fsub{{.*}}double - // FULL_FAST-NEXT: fadd{{.*}}double - - // BASIC: alloca { double, double } - // BASIC-NEXT: alloca { double, double } - // BASIC-NEXT: alloca { double, double } - // BASIC: load double, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load double, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 - // BASIC-NEXT: load double, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fmul{{.*}}double - // BASIC-NEXT: fsub{{.*}}double - // BASIC-NEXT: fadd{{.*}}double - // - // IMPRVD: alloca { double, double } - // IMPRVD-NEXT: alloca { double, double } - // IMPRVD-NEXT: alloca { double, double } - // IMPRVD: load double, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load double, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 - // IMPRVD-NEXT: load double, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fmul{{.*}}double - // IMPRVD-NEXT: fsub{{.*}}double - // IMPRVD-NEXT: fadd{{.*}}double - // - // PRMTD: alloca { double, double } - // PRMTD-NEXT: alloca { double, double } - // PRMTD-NEXT: alloca { double, double } - // PRMTD: load double, ptr - // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load double, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load double, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { double, double }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fmul{{.*}}double - // PRMTD-NEXT: fsub{{.*}}double - // PRMTD-NEXT: fadd{{.*}}double - - return a * b; -} - -_Complex _Float16 divf16(_Complex _Float16 a, _Complex _Float16 b) { - // LABEL: define {{.*}} @divf16( - - // FULL: call {{.*}} @__divsc3 - // FULL_FAST: call {{.*}} @__divsc3 - // - // BASIC: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fadd{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fadd{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fsub{{.*}}float - // BASIC-NEXT: fdiv{{.*}}float - // BASIC-NEXT: fdiv{{.*}}float - // - // IMPRVD: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // IMPRVD-NEXT: call{{.*}}float @llvm.fabs.f32(float {{.*}}) - // IMPRVD-NEXT: fcmp{{.*}}ugt float {{.*}}, {{.*}} - // IMPRVD-NEXT: br i1 {{.*}}, label - // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fsub{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: br label - // IMPRVD: abs_rhsr_less_than_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fsub{{.*}}float - // IMPRVD-NEXT: fdiv{{.*}}float - // - // PRMTD: load half, ptr {{.*}} - // PRMTD: fpext half {{.*}} to float - // PRMTD-NEXT: fpext half {{.*}} to float - // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load half, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load half, ptr {{.*}} - // PRMTD-NEXT: fpext half {{.*}} to float - // PRMTD-NEXT: fpext half {{.*}} to float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fadd{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fadd{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fsub{{.*}}float - // PRMTD-NEXT: fdiv{{.*}}float - // PRMTD-NEXT: fdiv{{.*}}float - // PRMTD-NEXT: fptrunc float {{.*}} to half - // PRMTD-NEXT: fptrunc float {{.*}} to half - - return a / b; -} - -_Complex _Float16 mulf16(_Complex _Float16 a, _Complex _Float16 b) { - // LABEL: define {{.*}} @mulf16( - // FULL: call {{.*}} @__mulsc3 - // - // FULL_FAST: call {{.*}} @__mulsc3 - // - // BASIC: alloca { half, half } - // BASIC-NEXT: alloca { half, half } - // BASIC: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // BASIC-NEXT: load half, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load half, ptr {{.*}} - // BASIC-NEXT: fpext half {{.*}} to float - // BASIC-NEXT: fpext half {{.*}} to float - // BASIC-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // BASIC-NEXT: load half, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load half - // BASIC-NEXT: fpext half {{.*}} to float - // BASIC-NEXT: fpext half {{.*}} to float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fmul{{.*}}float - // BASIC-NEXT: fsub{{.*}}float - // BASIC-NEXT: fadd{{.*}}float - // BASIC-NEXT: fptrunc float {{.*}} to half - // BASIC-NEXT: fptrunc float {{.*}} to half - // - // IMPRVD: alloca { half, half } - // IMPRVD-NEXT: alloca { half, half } - // IMPRVD-NEXT: alloca { half, half } - // IMPRVD: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // IMPRVD-NEXT: load half, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load half, ptr {{.*}} - // IMPRVD-NEXT: fpext half {{.*}} to float - // IMPRVD-NEXT: fpext half {{.*}} to float - // IMPRVD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // IMPRVD-NEXT: load half, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load half - // IMPRVD-NEXT: fpext half {{.*}} to float - // IMPRVD-NEXT: fpext half {{.*}} to float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fmul{{.*}}float - // IMPRVD-NEXT: fsub{{.*}}float - // IMPRVD-NEXT: fadd{{.*}}float - // IMPRVD-NEXT: fptrunc float {{.*}} to half - // IMPRVD-NEXT: fptrunc float {{.*}} to half - - // PRMTD: alloca { half, half } - // PRMTD-NEXT: alloca { half, half } - // PRMTD-NEXT: alloca { half, half } - // PRMTD: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load half, ptr - // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load half, ptr {{.*}} - // PRMTD-NEXT: fpext half {{.*}} to float - // PRMTD-NEXT: fpext half {{.*}} to float - // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load half, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { half, half }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load{{.*}}half - // PRMTD-NEXT: fpext half {{.*}} to float - // PRMTD-NEXT: fpext half {{.*}} to float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fmul{{.*}}float - // PRMTD-NEXT: fsub{{.*}}float - // PRMTD-NEXT: fadd{{.*}}float - // PRMTD-NEXT: fptrunc float {{.*}} to half - // PRMTD-NEXT: fptrunc float {{.*}} to half - return a * b; } +// FULL-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// FULL-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef [[A_REAL]], x86_fp80 noundef [[A_IMAG]], x86_fp80 noundef [[B_REAL]], x86_fp80 noundef [[B_IMAG]]) #[[ATTR2]] +// FULL-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store x86_fp80 [[TMP0]], ptr [[RETVAL_REALP]], align 16 +// FULL-NEXT: store x86_fp80 [[TMP1]], ptr [[RETVAL_IMAGP]], align 16 +// FULL-NEXT: [[TMP2:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// FULL-NEXT: ret { x86_fp80, x86_fp80 } [[TMP2]] +// +// BASIC-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// BASIC-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC-NEXT: [[TMP0:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_REAL]] +// BASIC-NEXT: [[TMP1:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[TMP2:%.*]] = fadd x86_fp80 [[TMP0]], [[TMP1]] +// BASIC-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[B_REAL]], [[B_REAL]] +// BASIC-NEXT: [[TMP4:%.*]] = fmul x86_fp80 [[B_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[TMP5:%.*]] = fadd x86_fp80 [[TMP3]], [[TMP4]] +// BASIC-NEXT: [[TMP6:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_REAL]] +// BASIC-NEXT: [[TMP7:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_IMAG]] +// BASIC-NEXT: [[TMP8:%.*]] = fsub x86_fp80 [[TMP6]], [[TMP7]] +// BASIC-NEXT: [[TMP9:%.*]] = fdiv x86_fp80 [[TMP2]], [[TMP5]] +// BASIC-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP8]], [[TMP5]] +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store x86_fp80 [[TMP9]], ptr [[RETVAL_REALP]], align 16 +// BASIC-NEXT: store x86_fp80 [[TMP10]], ptr [[RETVAL_IMAGP]], align 16 +// BASIC-NEXT: [[TMP11:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// BASIC-NEXT: ret { x86_fp80, x86_fp80 } [[TMP11]] +// +// IMPRVD-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// IMPRVD-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) +// IMPRVD-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) +// IMPRVD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt x86_fp80 [[TMP0]], [[TMP1]] +// IMPRVD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD-NEXT: [[TMP2:%.*]] = fdiv x86_fp80 [[B_IMAG]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[TMP2]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP4:%.*]] = fadd x86_fp80 [[B_REAL]], [[TMP3]] +// IMPRVD-NEXT: [[TMP5:%.*]] = fmul x86_fp80 [[A_IMAG]], [[TMP2]] +// IMPRVD-NEXT: [[TMP6:%.*]] = fadd x86_fp80 [[A_REAL]], [[TMP5]] +// IMPRVD-NEXT: [[TMP7:%.*]] = fdiv x86_fp80 [[TMP6]], [[TMP4]] +// IMPRVD-NEXT: [[TMP8:%.*]] = fmul x86_fp80 [[A_REAL]], [[TMP2]] +// IMPRVD-NEXT: [[TMP9:%.*]] = fsub x86_fp80 [[A_IMAG]], [[TMP8]] +// IMPRVD-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP9]], [[TMP4]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi: +// IMPRVD-NEXT: [[TMP11:%.*]] = fdiv x86_fp80 [[B_REAL]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP12:%.*]] = fmul x86_fp80 [[TMP11]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP13:%.*]] = fadd x86_fp80 [[B_IMAG]], [[TMP12]] +// IMPRVD-NEXT: [[TMP14:%.*]] = fmul x86_fp80 [[A_REAL]], [[TMP11]] +// IMPRVD-NEXT: [[TMP15:%.*]] = fadd x86_fp80 [[TMP14]], [[A_IMAG]] +// IMPRVD-NEXT: [[TMP16:%.*]] = fdiv x86_fp80 [[TMP15]], [[TMP13]] +// IMPRVD-NEXT: [[TMP17:%.*]] = fmul x86_fp80 [[A_IMAG]], [[TMP11]] +// IMPRVD-NEXT: [[TMP18:%.*]] = fsub x86_fp80 [[TMP17]], [[A_REAL]] +// IMPRVD-NEXT: [[TMP19:%.*]] = fdiv x86_fp80 [[TMP18]], [[TMP13]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD: complex_div: +// IMPRVD-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store x86_fp80 [[TMP20]], ptr [[RETVAL_REALP]], align 16 +// IMPRVD-NEXT: store x86_fp80 [[TMP21]], ptr [[RETVAL_IMAGP]], align 16 +// IMPRVD-NEXT: [[TMP22:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// IMPRVD-NEXT: ret { x86_fp80, x86_fp80 } [[TMP22]] +// +// PRMTD-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// PRMTD-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) +// PRMTD-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) +// PRMTD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt x86_fp80 [[TMP0]], [[TMP1]] +// PRMTD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD-NEXT: [[TMP2:%.*]] = fdiv x86_fp80 [[B_IMAG]], [[B_REAL]] +// PRMTD-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[TMP2]], [[B_IMAG]] +// PRMTD-NEXT: [[TMP4:%.*]] = fadd x86_fp80 [[B_REAL]], [[TMP3]] +// PRMTD-NEXT: [[TMP5:%.*]] = fmul x86_fp80 [[A_IMAG]], [[TMP2]] +// PRMTD-NEXT: [[TMP6:%.*]] = fadd x86_fp80 [[A_REAL]], [[TMP5]] +// PRMTD-NEXT: [[TMP7:%.*]] = fdiv x86_fp80 [[TMP6]], [[TMP4]] +// PRMTD-NEXT: [[TMP8:%.*]] = fmul x86_fp80 [[A_REAL]], [[TMP2]] +// PRMTD-NEXT: [[TMP9:%.*]] = fsub x86_fp80 [[A_IMAG]], [[TMP8]] +// PRMTD-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP9]], [[TMP4]] +// PRMTD-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD: abs_rhsr_less_than_abs_rhsi: +// PRMTD-NEXT: [[TMP11:%.*]] = fdiv x86_fp80 [[B_REAL]], [[B_IMAG]] +// PRMTD-NEXT: [[TMP12:%.*]] = fmul x86_fp80 [[TMP11]], [[B_REAL]] +// PRMTD-NEXT: [[TMP13:%.*]] = fadd x86_fp80 [[B_IMAG]], [[TMP12]] +// PRMTD-NEXT: [[TMP14:%.*]] = fmul x86_fp80 [[A_REAL]], [[TMP11]] +// PRMTD-NEXT: [[TMP15:%.*]] = fadd x86_fp80 [[TMP14]], [[A_IMAG]] +// PRMTD-NEXT: [[TMP16:%.*]] = fdiv x86_fp80 [[TMP15]], [[TMP13]] +// PRMTD-NEXT: [[TMP17:%.*]] = fmul x86_fp80 [[A_IMAG]], [[TMP11]] +// PRMTD-NEXT: [[TMP18:%.*]] = fsub x86_fp80 [[TMP17]], [[A_REAL]] +// PRMTD-NEXT: [[TMP19:%.*]] = fdiv x86_fp80 [[TMP18]], [[TMP13]] +// PRMTD-NEXT: br label [[COMPLEX_DIV]] +// PRMTD: complex_div: +// PRMTD-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store x86_fp80 [[TMP20]], ptr [[RETVAL_REALP]], align 16 +// PRMTD-NEXT: store x86_fp80 [[TMP21]], ptr [[RETVAL_IMAGP]], align 16 +// PRMTD-NEXT: [[TMP22:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// PRMTD-NEXT: ret { x86_fp80, x86_fp80 } [[TMP22]] +// +// X86WINPRMTD-LABEL: define dso_local void @divld( +// X86WINPRMTD-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) +// X86WINPRMTD-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) +// X86WINPRMTD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP0]], [[TMP1]] +// X86WINPRMTD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// X86WINPRMTD: abs_rhsr_greater_or_equal_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP2:%.*]] = fdiv double [[B_IMAG]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[TMP3:%.*]] = fmul double [[TMP2]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[TMP4:%.*]] = fadd double [[B_REAL]], [[TMP3]] +// X86WINPRMTD-NEXT: [[TMP5:%.*]] = fmul double [[A_IMAG]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP6:%.*]] = fadd double [[A_REAL]], [[TMP5]] +// X86WINPRMTD-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], [[TMP4]] +// X86WINPRMTD-NEXT: [[TMP8:%.*]] = fmul double [[A_REAL]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP9:%.*]] = fsub double [[A_IMAG]], [[TMP8]] +// X86WINPRMTD-NEXT: [[TMP10:%.*]] = fdiv double [[TMP9]], [[TMP4]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV:%.*]] +// X86WINPRMTD: abs_rhsr_less_than_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP11:%.*]] = fdiv double [[B_REAL]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[TMP12:%.*]] = fmul double [[TMP11]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[TMP13:%.*]] = fadd double [[B_IMAG]], [[TMP12]] +// X86WINPRMTD-NEXT: [[TMP14:%.*]] = fmul double [[A_REAL]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP15:%.*]] = fadd double [[TMP14]], [[A_IMAG]] +// X86WINPRMTD-NEXT: [[TMP16:%.*]] = fdiv double [[TMP15]], [[TMP13]] +// X86WINPRMTD-NEXT: [[TMP17:%.*]] = fmul double [[A_IMAG]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP18:%.*]] = fsub double [[TMP17]], [[A_REAL]] +// X86WINPRMTD-NEXT: [[TMP19:%.*]] = fdiv double [[TMP18]], [[TMP13]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV]] +// X86WINPRMTD: complex_div: +// X86WINPRMTD-NEXT: [[TMP20:%.*]] = phi double [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[TMP20]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD-NEXT: store double [[TMP21]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD-NEXT: ret void +// +// AVRFP32-LABEL: define dso_local { float, float } @divld( +// AVRFP32-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP32-NEXT: entry: +// AVRFP32-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[B:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP32-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP32-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP32-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP32-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP32-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP32-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 1 +// AVRFP32-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP4:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[B_REAL]]) +// AVRFP32-NEXT: [[TMP5:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[B_IMAG]]) +// AVRFP32-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP4]], [[TMP5]] +// AVRFP32-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// AVRFP32: abs_rhsr_greater_or_equal_abs_rhsi: +// AVRFP32-NEXT: [[TMP6:%.*]] = fdiv float [[B_IMAG]], [[B_REAL]] +// AVRFP32-NEXT: [[TMP7:%.*]] = fmul float [[TMP6]], [[B_IMAG]] +// AVRFP32-NEXT: [[TMP8:%.*]] = fadd float [[B_REAL]], [[TMP7]] +// AVRFP32-NEXT: [[TMP9:%.*]] = fmul float [[A_IMAG]], [[TMP6]] +// AVRFP32-NEXT: [[TMP10:%.*]] = fadd float [[A_REAL]], [[TMP9]] +// AVRFP32-NEXT: [[TMP11:%.*]] = fdiv float [[TMP10]], [[TMP8]] +// AVRFP32-NEXT: [[TMP12:%.*]] = fmul float [[A_REAL]], [[TMP6]] +// AVRFP32-NEXT: [[TMP13:%.*]] = fsub float [[A_IMAG]], [[TMP12]] +// AVRFP32-NEXT: [[TMP14:%.*]] = fdiv float [[TMP13]], [[TMP8]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV:%.*]] +// AVRFP32: abs_rhsr_less_than_abs_rhsi: +// AVRFP32-NEXT: [[TMP15:%.*]] = fdiv float [[B_REAL]], [[B_IMAG]] +// AVRFP32-NEXT: [[TMP16:%.*]] = fmul float [[TMP15]], [[B_REAL]] +// AVRFP32-NEXT: [[TMP17:%.*]] = fadd float [[B_IMAG]], [[TMP16]] +// AVRFP32-NEXT: [[TMP18:%.*]] = fmul float [[A_REAL]], [[TMP15]] +// AVRFP32-NEXT: [[TMP19:%.*]] = fadd float [[TMP18]], [[A_IMAG]] +// AVRFP32-NEXT: [[TMP20:%.*]] = fdiv float [[TMP19]], [[TMP17]] +// AVRFP32-NEXT: [[TMP21:%.*]] = fmul float [[A_IMAG]], [[TMP15]] +// AVRFP32-NEXT: [[TMP22:%.*]] = fsub float [[TMP21]], [[A_REAL]] +// AVRFP32-NEXT: [[TMP23:%.*]] = fdiv float [[TMP22]], [[TMP17]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV]] +// AVRFP32: complex_div: +// AVRFP32-NEXT: [[TMP24:%.*]] = phi float [ [[TMP11]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP20]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[TMP25:%.*]] = phi float [ [[TMP14]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP23]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP32-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[TMP24]], ptr [[RETVAL_REALP]], align 1 +// AVRFP32-NEXT: store float [[TMP25]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP26:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP32-NEXT: ret { float, float } [[TMP26]] +// +// AVRFP64-LABEL: define dso_local void @divld( +// AVRFP64-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 1 [[AGG_RESULT:%.*]], double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP64-NEXT: entry: +// AVRFP64-NEXT: [[A:%.*]] = alloca { double, double }, align 1 +// AVRFP64-NEXT: [[B:%.*]] = alloca { double, double }, align 1 +// AVRFP64-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP64-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP64-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP64-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP64-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 1 +// AVRFP64-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 1 +// AVRFP64-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 1 +// AVRFP64-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 1 +// AVRFP64-NEXT: [[TMP4:%.*]] = call addrspace(1) double @llvm.fabs.f64(double [[B_REAL]]) +// AVRFP64-NEXT: [[TMP5:%.*]] = call addrspace(1) double @llvm.fabs.f64(double [[B_IMAG]]) +// AVRFP64-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP4]], [[TMP5]] +// AVRFP64-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// AVRFP64: abs_rhsr_greater_or_equal_abs_rhsi: +// AVRFP64-NEXT: [[TMP6:%.*]] = fdiv double [[B_IMAG]], [[B_REAL]] +// AVRFP64-NEXT: [[TMP7:%.*]] = fmul double [[TMP6]], [[B_IMAG]] +// AVRFP64-NEXT: [[TMP8:%.*]] = fadd double [[B_REAL]], [[TMP7]] +// AVRFP64-NEXT: [[TMP9:%.*]] = fmul double [[A_IMAG]], [[TMP6]] +// AVRFP64-NEXT: [[TMP10:%.*]] = fadd double [[A_REAL]], [[TMP9]] +// AVRFP64-NEXT: [[TMP11:%.*]] = fdiv double [[TMP10]], [[TMP8]] +// AVRFP64-NEXT: [[TMP12:%.*]] = fmul double [[A_REAL]], [[TMP6]] +// AVRFP64-NEXT: [[TMP13:%.*]] = fsub double [[A_IMAG]], [[TMP12]] +// AVRFP64-NEXT: [[TMP14:%.*]] = fdiv double [[TMP13]], [[TMP8]] +// AVRFP64-NEXT: br label [[COMPLEX_DIV:%.*]] +// AVRFP64: abs_rhsr_less_than_abs_rhsi: +// AVRFP64-NEXT: [[TMP15:%.*]] = fdiv double [[B_REAL]], [[B_IMAG]] +// AVRFP64-NEXT: [[TMP16:%.*]] = fmul double [[TMP15]], [[B_REAL]] +// AVRFP64-NEXT: [[TMP17:%.*]] = fadd double [[B_IMAG]], [[TMP16]] +// AVRFP64-NEXT: [[TMP18:%.*]] = fmul double [[A_REAL]], [[TMP15]] +// AVRFP64-NEXT: [[TMP19:%.*]] = fadd double [[TMP18]], [[A_IMAG]] +// AVRFP64-NEXT: [[TMP20:%.*]] = fdiv double [[TMP19]], [[TMP17]] +// AVRFP64-NEXT: [[TMP21:%.*]] = fmul double [[A_IMAG]], [[TMP15]] +// AVRFP64-NEXT: [[TMP22:%.*]] = fsub double [[TMP21]], [[A_REAL]] +// AVRFP64-NEXT: [[TMP23:%.*]] = fdiv double [[TMP22]], [[TMP17]] +// AVRFP64-NEXT: br label [[COMPLEX_DIV]] +// AVRFP64: complex_div: +// AVRFP64-NEXT: [[TMP24:%.*]] = phi double [ [[TMP11]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP20]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP64-NEXT: [[TMP25:%.*]] = phi double [ [[TMP14]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP23]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP64-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[TMP24]], ptr [[AGG_RESULT_REALP]], align 1 +// AVRFP64-NEXT: store double [[TMP25]], ptr [[AGG_RESULT_IMAGP]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1 +// AVRFP64-NEXT: ret void +// +// BASIC_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// BASIC_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP0]], [[TMP1]] +// BASIC_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP5:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP3]], [[TMP4]] +// BASIC_FAST-NEXT: [[TMP6:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_REAL]] +// BASIC_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[TMP8:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP7]] +// BASIC_FAST-NEXT: [[TMP9:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[TMP5]] +// BASIC_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP8]], [[TMP5]] +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store x86_fp80 [[TMP9]], ptr [[RETVAL_REALP]], align 16 +// BASIC_FAST-NEXT: store x86_fp80 [[TMP10]], ptr [[RETVAL_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[TMP11:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// BASIC_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP11]] +// +// FULL_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// FULL_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store x86_fp80 [[TMP0]], ptr [[RETVAL_REALP]], align 16 +// FULL_FAST-NEXT: store x86_fp80 [[TMP1]], ptr [[RETVAL_IMAGP]], align 16 +// FULL_FAST-NEXT: [[TMP2:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// FULL_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP2]] +// +// IMPRVD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// IMPRVD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) +// IMPRVD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt x86_fp80 [[TMP0]], [[TMP1]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP3]] +// IMPRVD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[TMP5]] +// IMPRVD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP4]] +// IMPRVD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[TMP8]] +// IMPRVD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP9]], [[TMP4]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP11]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP12]] +// IMPRVD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP14]], [[A_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP15]], [[TMP13]] +// IMPRVD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP17]], [[A_REAL]] +// IMPRVD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP18]], [[TMP13]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD_FAST: complex_div: +// IMPRVD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store x86_fp80 [[TMP20]], ptr [[RETVAL_REALP]], align 16 +// IMPRVD_FAST-NEXT: store x86_fp80 [[TMP21]], ptr [[RETVAL_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[TMP22:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// IMPRVD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP22]] +// +// PRMTD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// PRMTD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) +// PRMTD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) +// PRMTD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt x86_fp80 [[TMP0]], [[TMP1]] +// PRMTD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP3]] +// PRMTD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[TMP2]] +// PRMTD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[TMP5]] +// PRMTD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP4]] +// PRMTD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[TMP2]] +// PRMTD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[TMP8]] +// PRMTD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP9]], [[TMP4]] +// PRMTD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD_FAST: abs_rhsr_less_than_abs_rhsi: +// PRMTD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP11]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP12]] +// PRMTD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[TMP11]] +// PRMTD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP14]], [[A_IMAG]] +// PRMTD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP15]], [[TMP13]] +// PRMTD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[TMP11]] +// PRMTD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP17]], [[A_REAL]] +// PRMTD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP18]], [[TMP13]] +// PRMTD_FAST-NEXT: br label [[COMPLEX_DIV]] +// PRMTD_FAST: complex_div: +// PRMTD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store x86_fp80 [[TMP20]], ptr [[RETVAL_REALP]], align 16 +// PRMTD_FAST-NEXT: store x86_fp80 [[TMP21]], ptr [[RETVAL_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[TMP22:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// PRMTD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP22]] +// _Complex long double divld(_Complex long double a, _Complex long double b) { - // LABEL: define {{.*}} @divld( - // FULL: call {{.*}} @__divxc3 - // FULL_FAST: call {{.*}} @__divxc3 - // - // BASIC: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fadd{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fadd{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fsub{{.*}}x86_fp80 - // BASIC-NEXT: fdiv{{.*}}x86_fp80 - // BASIC-NEXT: fdiv{{.*}}x86_fp80 - // - // IMPRVD: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) - // IMPRVD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) - // IMPRVD-NEXT: fcmp{{.*}}ugt x86_fp80 {{.*}}, {{.*}} - // IMPRVD-NEXT: br i1 {{.*}}, label - // IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fadd{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fadd{{.*}}x86_fp80 - // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fsub{{.*}}x86_fp80 - // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 - // IMPRVD-NEXT: br label - // IMPRVD: abs_rhsr_less_than_abs_rhsi: - // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fadd{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fadd{{.*}}x86_fp80 - // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fsub{{.*}}x86_fp80 - // IMPRVD-NEXT: fdiv{{.*}}x86_fp80 - // - // PRMTD: alloca { x86_fp80, x86_fp80 } - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load x86_fp80, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load x86_fp80, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load x86_fp80, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 } - // PRMTD-NEXT: load x86_fp80, ptr {{.*}} - // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) - // PRMTD-NEXT: call{{.*}}x86_fp80 @llvm.fabs.f80(x86_fp80 {{.*}}) - // PRMTD-NEXT: fcmp{{.*}}ugt x86_fp80 {{.*}},{{.*}} - // PRMTD-NEXT: br i1 {{.*}}, label {{.*}}, label {{.*}} - // PRMTD: abs_rhsr_greater_or_equal_abs_rhsi: - // PRMTD-NEXT: fdiv{{.*}} x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fsub{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 - // PRMTD-NEXT: br label - // PRMTD: abs_rhsr_less_than_abs_rhsi: - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fsub{{.*}}x86_fp80 - // PRMTD-NEXT: fdiv {{.*}}x86_fp80 - return a / b; } +// FULL-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// FULL-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL-NEXT: [[MUL_AC:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_REAL]] +// FULL-NEXT: [[MUL_BD:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_IMAG]] +// FULL-NEXT: [[MUL_AD:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_IMAG]] +// FULL-NEXT: [[MUL_BC:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_REAL]] +// FULL-NEXT: [[MUL_R:%.*]] = fsub x86_fp80 [[MUL_AC]], [[MUL_BD]] +// FULL-NEXT: [[MUL_I:%.*]] = fadd x86_fp80 [[MUL_AD]], [[MUL_BC]] +// FULL-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno x86_fp80 [[MUL_R]], [[MUL_R]] +// FULL-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] +// FULL: complex_mul_imag_nan: +// FULL-NEXT: [[ISNAN_CMP1:%.*]] = fcmp uno x86_fp80 [[MUL_I]], [[MUL_I]] +// FULL-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL: complex_mul_libcall: +// FULL-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef [[A_REAL]], x86_fp80 noundef [[A_IMAG]], x86_fp80 noundef [[B_REAL]], x86_fp80 noundef [[B_IMAG]]) #[[ATTR2]] +// FULL-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL: complex_mul_cont: +// FULL-NEXT: [[REAL_MUL_PHI:%.*]] = phi x86_fp80 [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP0]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[IMAG_MUL_PHI:%.*]] = phi x86_fp80 [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP1]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store x86_fp80 [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 16 +// FULL-NEXT: store x86_fp80 [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 16 +// FULL-NEXT: [[TMP2:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// FULL-NEXT: ret { x86_fp80, x86_fp80 } [[TMP2]] +// +// BASIC-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// BASIC-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC-NEXT: [[MUL_AC:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_REAL]] +// BASIC-NEXT: [[MUL_BD:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_IMAG]] +// BASIC-NEXT: [[MUL_AD:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_IMAG]] +// BASIC-NEXT: [[MUL_BC:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_REAL]] +// BASIC-NEXT: [[MUL_R:%.*]] = fsub x86_fp80 [[MUL_AC]], [[MUL_BD]] +// BASIC-NEXT: [[MUL_I:%.*]] = fadd x86_fp80 [[MUL_AD]], [[MUL_BC]] +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store x86_fp80 [[MUL_R]], ptr [[RETVAL_REALP]], align 16 +// BASIC-NEXT: store x86_fp80 [[MUL_I]], ptr [[RETVAL_IMAGP]], align 16 +// BASIC-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// BASIC-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] +// +// IMPRVD-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// IMPRVD-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD-NEXT: [[MUL_AC:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_REAL]] +// IMPRVD-NEXT: [[MUL_BD:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_IMAG]] +// IMPRVD-NEXT: [[MUL_AD:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_IMAG]] +// IMPRVD-NEXT: [[MUL_BC:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_REAL]] +// IMPRVD-NEXT: [[MUL_R:%.*]] = fsub x86_fp80 [[MUL_AC]], [[MUL_BD]] +// IMPRVD-NEXT: [[MUL_I:%.*]] = fadd x86_fp80 [[MUL_AD]], [[MUL_BC]] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store x86_fp80 [[MUL_R]], ptr [[RETVAL_REALP]], align 16 +// IMPRVD-NEXT: store x86_fp80 [[MUL_I]], ptr [[RETVAL_IMAGP]], align 16 +// IMPRVD-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// IMPRVD-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] +// +// PRMTD-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// PRMTD-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD-NEXT: [[MUL_AC:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_REAL]] +// PRMTD-NEXT: [[MUL_BD:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_IMAG]] +// PRMTD-NEXT: [[MUL_AD:%.*]] = fmul x86_fp80 [[A_REAL]], [[B_IMAG]] +// PRMTD-NEXT: [[MUL_BC:%.*]] = fmul x86_fp80 [[A_IMAG]], [[B_REAL]] +// PRMTD-NEXT: [[MUL_R:%.*]] = fsub x86_fp80 [[MUL_AC]], [[MUL_BD]] +// PRMTD-NEXT: [[MUL_I:%.*]] = fadd x86_fp80 [[MUL_AD]], [[MUL_BC]] +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store x86_fp80 [[MUL_R]], ptr [[RETVAL_REALP]], align 16 +// PRMTD-NEXT: store x86_fp80 [[MUL_I]], ptr [[RETVAL_IMAGP]], align 16 +// PRMTD-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// PRMTD-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] +// +// X86WINPRMTD-LABEL: define dso_local void @mulld( +// X86WINPRMTD-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// X86WINPRMTD-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[MUL_R]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD-NEXT: store double [[MUL_I]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD-NEXT: ret void +// +// AVRFP32-LABEL: define dso_local { float, float } @mulld( +// AVRFP32-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP32-NEXT: entry: +// AVRFP32-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[B:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP32-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP32-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP32-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP32-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP32-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP32-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 1 +// AVRFP32-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 1 +// AVRFP32-NEXT: [[MUL_AC:%.*]] = fmul float [[A_REAL]], [[B_REAL]] +// AVRFP32-NEXT: [[MUL_BD:%.*]] = fmul float [[A_IMAG]], [[B_IMAG]] +// AVRFP32-NEXT: [[MUL_AD:%.*]] = fmul float [[A_REAL]], [[B_IMAG]] +// AVRFP32-NEXT: [[MUL_BC:%.*]] = fmul float [[A_IMAG]], [[B_REAL]] +// AVRFP32-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// AVRFP32-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// AVRFP32-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP32-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 1 +// AVRFP32-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP4:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP32-NEXT: ret { float, float } [[TMP4]] +// +// AVRFP64-LABEL: define dso_local void @mulld( +// AVRFP64-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 1 [[AGG_RESULT:%.*]], double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP64-NEXT: entry: +// AVRFP64-NEXT: [[A:%.*]] = alloca { double, double }, align 1 +// AVRFP64-NEXT: [[B:%.*]] = alloca { double, double }, align 1 +// AVRFP64-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP64-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP64-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP64-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP64-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 1 +// AVRFP64-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 1 +// AVRFP64-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 1 +// AVRFP64-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 1 +// AVRFP64-NEXT: [[MUL_AC:%.*]] = fmul double [[A_REAL]], [[B_REAL]] +// AVRFP64-NEXT: [[MUL_BD:%.*]] = fmul double [[A_IMAG]], [[B_IMAG]] +// AVRFP64-NEXT: [[MUL_AD:%.*]] = fmul double [[A_REAL]], [[B_IMAG]] +// AVRFP64-NEXT: [[MUL_BC:%.*]] = fmul double [[A_IMAG]], [[B_REAL]] +// AVRFP64-NEXT: [[MUL_R:%.*]] = fsub double [[MUL_AC]], [[MUL_BD]] +// AVRFP64-NEXT: [[MUL_I:%.*]] = fadd double [[MUL_AD]], [[MUL_BC]] +// AVRFP64-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[MUL_R]], ptr [[AGG_RESULT_REALP]], align 1 +// AVRFP64-NEXT: store double [[MUL_I]], ptr [[AGG_RESULT_IMAGP]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 1 +// AVRFP64-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// AVRFP64-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 1 +// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1 +// AVRFP64-NEXT: ret void +// +// BASIC_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// BASIC_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_REAL]] +// BASIC_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_IMAG]] +// BASIC_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_REAL]] +// BASIC_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AC]], [[MUL_BD]] +// BASIC_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AD]], [[MUL_BC]] +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store x86_fp80 [[MUL_R]], ptr [[RETVAL_REALP]], align 16 +// BASIC_FAST-NEXT: store x86_fp80 [[MUL_I]], ptr [[RETVAL_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// BASIC_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] +// +// FULL_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// FULL_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_REAL]] +// FULL_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_IMAG]] +// FULL_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_IMAG]] +// FULL_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_REAL]] +// FULL_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AC]], [[MUL_BD]] +// FULL_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AD]], [[MUL_BC]] +// FULL_FAST-NEXT: [[ISNAN_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno x86_fp80 [[MUL_R]], [[MUL_R]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2]] +// FULL_FAST: complex_mul_imag_nan: +// FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno x86_fp80 [[MUL_I]], [[MUL_I]] +// FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// FULL_FAST: complex_mul_libcall: +// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]] +// FULL_FAST: complex_mul_cont: +// FULL_FAST-NEXT: [[REAL_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP0]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[IMAG_MUL_PHI:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[TMP1]], [[COMPLEX_MUL_LIBCALL]] ] +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store x86_fp80 [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 16 +// FULL_FAST-NEXT: store x86_fp80 [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 16 +// FULL_FAST-NEXT: [[TMP2:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// FULL_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP2]] +// +// IMPRVD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// IMPRVD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AC]], [[MUL_BD]] +// IMPRVD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AD]], [[MUL_BC]] +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store x86_fp80 [[MUL_R]], ptr [[RETVAL_REALP]], align 16 +// IMPRVD_FAST-NEXT: store x86_fp80 [[MUL_I]], ptr [[RETVAL_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// IMPRVD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] +// +// PRMTD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// PRMTD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[MUL_AC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[MUL_BD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[MUL_AD:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_REAL]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[MUL_BC:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[A_IMAG]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[MUL_R:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AC]], [[MUL_BD]] +// PRMTD_FAST-NEXT: [[MUL_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[MUL_AD]], [[MUL_BC]] +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store x86_fp80 [[MUL_R]], ptr [[RETVAL_REALP]], align 16 +// PRMTD_FAST-NEXT: store x86_fp80 [[MUL_I]], ptr [[RETVAL_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// PRMTD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] +// _Complex long double mulld(_Complex long double a, _Complex long double b) { - // LABEL: define {{.*}} @mulld( - // FULL: call {{.*}} @__mulxc3 - - // FULL_FAST: alloca { x86_fp80, x86_fp80 } - // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // FULL_FAST-NEXT: load x86_fp80, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load x86_fp80, ptr {{.*}} - // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // FULL_FAST-NEXT: load x86_fp80 - // FULL_FAST-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // FULL_FAST-NEXT: load x86_fp80, ptr {{.*}} - // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 - // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 - // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 - // FULL_FAST-NEXT: fmul{{.*}}x86_fp80 - // FULL_FAST-NEXT: fsub{{.*}}x86_fp80 - // FULL_FAST-NEXT: fadd{{.*}}x86_fp80 - - // BASIC: alloca { x86_fp80, x86_fp80 } - // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // BASIC-NEXT: load x86_fp80, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load x86_fp80, ptr {{.*}} - // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // BASIC-NEXT: load x86_fp80 - // BASIC-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // BASIC-NEXT: load x86_fp80, ptr {{.*}} - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fmul{{.*}}x86_fp80 - // BASIC-NEXT: fsub{{.*}}x86_fp80 - // BASIC-NEXT: fadd{{.*}}x86_fp80 - // - // IMPRVD: alloca { x86_fp80, x86_fp80 } - // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // IMPRVD-NEXT: load x86_fp80, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load x86_fp80, ptr {{.*}} - // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // IMPRVD-NEXT: load x86_fp80 - // IMPRVD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // IMPRVD-NEXT: load x86_fp80, ptr {{.*}} - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fmul{{.*}}x86_fp80 - // IMPRVD-NEXT: fsub{{.*}}x86_fp80 - // IMPRVD-NEXT: fadd{{.*}}x86_fp80 - // - // PRMTD: alloca { x86_fp80, x86_fp80 } - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load x86_fp80, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load x86_fp80, ptr {{.*}} - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 0 - // PRMTD-NEXT: load{{.*}}x86_fp80 - // PRMTD-NEXT: getelementptr inbounds { x86_fp80, x86_fp80 }, ptr {{.*}}, i32 0, i32 1 - // PRMTD-NEXT: load x86_fp80, ptr {{.*}} - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fmul{{.*}}x86_fp80 - // PRMTD-NEXT: fsub{{.*}}x86_fp80 - // PRMTD-NEXT: fadd{{.*}}x86_fp80 - return a * b; } + +// FULL-LABEL: define dso_local <2 x float> @f1( +// FULL-SAME: <2 x float> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// FULL-NEXT: entry: +// FULL-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// FULL-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// FULL-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// FULL-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// FULL-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// FULL-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// FULL-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// FULL-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// FULL-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef [[B_REAL]], x86_fp80 noundef [[B_IMAG]], x86_fp80 noundef [[CONV]], x86_fp80 noundef [[CONV1]]) #[[ATTR2]] +// FULL-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to float +// FULL-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP1]] to float +// FULL-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// FULL-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// FULL-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// FULL-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// FULL-NEXT: [[CALL4:%.*]] = call <2 x float> @__divsc3(float noundef [[CONV2]], float noundef [[CONV3]], float noundef [[A_REAL]], float noundef [[A_IMAG]]) #[[ATTR2]] +// FULL-NEXT: store <2 x float> [[CALL4]], ptr [[COERCE]], align 4 +// FULL-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// FULL-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// FULL-NEXT: store float [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 4 +// FULL-NEXT: store float [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 4 +// FULL-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// FULL-NEXT: ret <2 x float> [[TMP2]] +// +// BASIC-LABEL: define dso_local <2 x float> @f1( +// BASIC-SAME: <2 x float> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// BASIC-NEXT: entry: +// BASIC-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// BASIC-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// BASIC-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// BASIC-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// BASIC-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// BASIC-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// BASIC-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// BASIC-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// BASIC-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// BASIC-NEXT: [[TMP0:%.*]] = fmul x86_fp80 [[B_REAL]], [[CONV]] +// BASIC-NEXT: [[TMP1:%.*]] = fmul x86_fp80 [[B_IMAG]], [[CONV1]] +// BASIC-NEXT: [[TMP2:%.*]] = fadd x86_fp80 [[TMP0]], [[TMP1]] +// BASIC-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[CONV]], [[CONV]] +// BASIC-NEXT: [[TMP4:%.*]] = fmul x86_fp80 [[CONV1]], [[CONV1]] +// BASIC-NEXT: [[TMP5:%.*]] = fadd x86_fp80 [[TMP3]], [[TMP4]] +// BASIC-NEXT: [[TMP6:%.*]] = fmul x86_fp80 [[B_IMAG]], [[CONV]] +// BASIC-NEXT: [[TMP7:%.*]] = fmul x86_fp80 [[B_REAL]], [[CONV1]] +// BASIC-NEXT: [[TMP8:%.*]] = fsub x86_fp80 [[TMP6]], [[TMP7]] +// BASIC-NEXT: [[TMP9:%.*]] = fdiv x86_fp80 [[TMP2]], [[TMP5]] +// BASIC-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP8]], [[TMP5]] +// BASIC-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP9]] to float +// BASIC-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP10]] to float +// BASIC-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// BASIC-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// BASIC-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// BASIC-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// BASIC-NEXT: [[TMP11:%.*]] = fmul float [[CONV2]], [[A_REAL]] +// BASIC-NEXT: [[TMP12:%.*]] = fmul float [[CONV3]], [[A_IMAG]] +// BASIC-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] +// BASIC-NEXT: [[TMP14:%.*]] = fmul float [[A_REAL]], [[A_REAL]] +// BASIC-NEXT: [[TMP15:%.*]] = fmul float [[A_IMAG]], [[A_IMAG]] +// BASIC-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] +// BASIC-NEXT: [[TMP17:%.*]] = fmul float [[CONV3]], [[A_REAL]] +// BASIC-NEXT: [[TMP18:%.*]] = fmul float [[CONV2]], [[A_IMAG]] +// BASIC-NEXT: [[TMP19:%.*]] = fsub float [[TMP17]], [[TMP18]] +// BASIC-NEXT: [[TMP20:%.*]] = fdiv float [[TMP13]], [[TMP16]] +// BASIC-NEXT: [[TMP21:%.*]] = fdiv float [[TMP19]], [[TMP16]] +// BASIC-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC-NEXT: store float [[TMP20]], ptr [[RETVAL_REALP]], align 4 +// BASIC-NEXT: store float [[TMP21]], ptr [[RETVAL_IMAGP]], align 4 +// BASIC-NEXT: [[TMP22:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// BASIC-NEXT: ret <2 x float> [[TMP22]] +// +// IMPRVD-LABEL: define dso_local <2 x float> @f1( +// IMPRVD-SAME: <2 x float> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD-NEXT: entry: +// IMPRVD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// IMPRVD-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// IMPRVD-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// IMPRVD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// IMPRVD-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// IMPRVD-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// IMPRVD-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// IMPRVD-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// IMPRVD-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// IMPRVD-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// IMPRVD-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// IMPRVD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt x86_fp80 [[TMP0]], [[TMP1]] +// IMPRVD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD-NEXT: [[TMP2:%.*]] = fdiv x86_fp80 [[CONV1]], [[CONV]] +// IMPRVD-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[TMP2]], [[CONV1]] +// IMPRVD-NEXT: [[TMP4:%.*]] = fadd x86_fp80 [[CONV]], [[TMP3]] +// IMPRVD-NEXT: [[TMP5:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP2]] +// IMPRVD-NEXT: [[TMP6:%.*]] = fadd x86_fp80 [[B_REAL]], [[TMP5]] +// IMPRVD-NEXT: [[TMP7:%.*]] = fdiv x86_fp80 [[TMP6]], [[TMP4]] +// IMPRVD-NEXT: [[TMP8:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP2]] +// IMPRVD-NEXT: [[TMP9:%.*]] = fsub x86_fp80 [[B_IMAG]], [[TMP8]] +// IMPRVD-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP9]], [[TMP4]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi: +// IMPRVD-NEXT: [[TMP11:%.*]] = fdiv x86_fp80 [[CONV]], [[CONV1]] +// IMPRVD-NEXT: [[TMP12:%.*]] = fmul x86_fp80 [[TMP11]], [[CONV]] +// IMPRVD-NEXT: [[TMP13:%.*]] = fadd x86_fp80 [[CONV1]], [[TMP12]] +// IMPRVD-NEXT: [[TMP14:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP11]] +// IMPRVD-NEXT: [[TMP15:%.*]] = fadd x86_fp80 [[TMP14]], [[B_IMAG]] +// IMPRVD-NEXT: [[TMP16:%.*]] = fdiv x86_fp80 [[TMP15]], [[TMP13]] +// IMPRVD-NEXT: [[TMP17:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP11]] +// IMPRVD-NEXT: [[TMP18:%.*]] = fsub x86_fp80 [[TMP17]], [[B_REAL]] +// IMPRVD-NEXT: [[TMP19:%.*]] = fdiv x86_fp80 [[TMP18]], [[TMP13]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD: complex_div: +// IMPRVD-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to float +// IMPRVD-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to float +// IMPRVD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// IMPRVD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// IMPRVD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// IMPRVD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// IMPRVD-NEXT: [[TMP22:%.*]] = call float @llvm.fabs.f32(float [[A_REAL]]) +// IMPRVD-NEXT: [[TMP23:%.*]] = call float @llvm.fabs.f32(float [[A_IMAG]]) +// IMPRVD-NEXT: [[ABS_CMP4:%.*]] = fcmp ugt float [[TMP22]], [[TMP23]] +// IMPRVD-NEXT: br i1 [[ABS_CMP4]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI5:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI6:%.*]] +// IMPRVD: abs_rhsr_greater_or_equal_abs_rhsi5: +// IMPRVD-NEXT: [[TMP24:%.*]] = fdiv float [[A_IMAG]], [[A_REAL]] +// IMPRVD-NEXT: [[TMP25:%.*]] = fmul float [[TMP24]], [[A_IMAG]] +// IMPRVD-NEXT: [[TMP26:%.*]] = fadd float [[A_REAL]], [[TMP25]] +// IMPRVD-NEXT: [[TMP27:%.*]] = fmul float [[CONV3]], [[TMP24]] +// IMPRVD-NEXT: [[TMP28:%.*]] = fadd float [[CONV2]], [[TMP27]] +// IMPRVD-NEXT: [[TMP29:%.*]] = fdiv float [[TMP28]], [[TMP26]] +// IMPRVD-NEXT: [[TMP30:%.*]] = fmul float [[CONV2]], [[TMP24]] +// IMPRVD-NEXT: [[TMP31:%.*]] = fsub float [[CONV3]], [[TMP30]] +// IMPRVD-NEXT: [[TMP32:%.*]] = fdiv float [[TMP31]], [[TMP26]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV7:%.*]] +// IMPRVD: abs_rhsr_less_than_abs_rhsi6: +// IMPRVD-NEXT: [[TMP33:%.*]] = fdiv float [[A_REAL]], [[A_IMAG]] +// IMPRVD-NEXT: [[TMP34:%.*]] = fmul float [[TMP33]], [[A_REAL]] +// IMPRVD-NEXT: [[TMP35:%.*]] = fadd float [[A_IMAG]], [[TMP34]] +// IMPRVD-NEXT: [[TMP36:%.*]] = fmul float [[CONV2]], [[TMP33]] +// IMPRVD-NEXT: [[TMP37:%.*]] = fadd float [[TMP36]], [[CONV3]] +// IMPRVD-NEXT: [[TMP38:%.*]] = fdiv float [[TMP37]], [[TMP35]] +// IMPRVD-NEXT: [[TMP39:%.*]] = fmul float [[CONV3]], [[TMP33]] +// IMPRVD-NEXT: [[TMP40:%.*]] = fsub float [[TMP39]], [[CONV2]] +// IMPRVD-NEXT: [[TMP41:%.*]] = fdiv float [[TMP40]], [[TMP35]] +// IMPRVD-NEXT: br label [[COMPLEX_DIV7]] +// IMPRVD: complex_div7: +// IMPRVD-NEXT: [[TMP42:%.*]] = phi float [ [[TMP29]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI5]] ], [ [[TMP38]], [[ABS_RHSR_LESS_THAN_ABS_RHSI6]] ] +// IMPRVD-NEXT: [[TMP43:%.*]] = phi float [ [[TMP32]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI5]] ], [ [[TMP41]], [[ABS_RHSR_LESS_THAN_ABS_RHSI6]] ] +// IMPRVD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD-NEXT: store float [[TMP42]], ptr [[RETVAL_REALP]], align 4 +// IMPRVD-NEXT: store float [[TMP43]], ptr [[RETVAL_IMAGP]], align 4 +// IMPRVD-NEXT: [[TMP44:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// IMPRVD-NEXT: ret <2 x float> [[TMP44]] +// +// PRMTD-LABEL: define dso_local <2 x float> @f1( +// PRMTD-SAME: <2 x float> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD-NEXT: entry: +// PRMTD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// PRMTD-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// PRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// PRMTD-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// PRMTD-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// PRMTD-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// PRMTD-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// PRMTD-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// PRMTD-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// PRMTD-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// PRMTD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt x86_fp80 [[TMP0]], [[TMP1]] +// PRMTD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD-NEXT: [[TMP2:%.*]] = fdiv x86_fp80 [[CONV1]], [[CONV]] +// PRMTD-NEXT: [[TMP3:%.*]] = fmul x86_fp80 [[TMP2]], [[CONV1]] +// PRMTD-NEXT: [[TMP4:%.*]] = fadd x86_fp80 [[CONV]], [[TMP3]] +// PRMTD-NEXT: [[TMP5:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP2]] +// PRMTD-NEXT: [[TMP6:%.*]] = fadd x86_fp80 [[B_REAL]], [[TMP5]] +// PRMTD-NEXT: [[TMP7:%.*]] = fdiv x86_fp80 [[TMP6]], [[TMP4]] +// PRMTD-NEXT: [[TMP8:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP2]] +// PRMTD-NEXT: [[TMP9:%.*]] = fsub x86_fp80 [[B_IMAG]], [[TMP8]] +// PRMTD-NEXT: [[TMP10:%.*]] = fdiv x86_fp80 [[TMP9]], [[TMP4]] +// PRMTD-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD: abs_rhsr_less_than_abs_rhsi: +// PRMTD-NEXT: [[TMP11:%.*]] = fdiv x86_fp80 [[CONV]], [[CONV1]] +// PRMTD-NEXT: [[TMP12:%.*]] = fmul x86_fp80 [[TMP11]], [[CONV]] +// PRMTD-NEXT: [[TMP13:%.*]] = fadd x86_fp80 [[CONV1]], [[TMP12]] +// PRMTD-NEXT: [[TMP14:%.*]] = fmul x86_fp80 [[B_REAL]], [[TMP11]] +// PRMTD-NEXT: [[TMP15:%.*]] = fadd x86_fp80 [[TMP14]], [[B_IMAG]] +// PRMTD-NEXT: [[TMP16:%.*]] = fdiv x86_fp80 [[TMP15]], [[TMP13]] +// PRMTD-NEXT: [[TMP17:%.*]] = fmul x86_fp80 [[B_IMAG]], [[TMP11]] +// PRMTD-NEXT: [[TMP18:%.*]] = fsub x86_fp80 [[TMP17]], [[B_REAL]] +// PRMTD-NEXT: [[TMP19:%.*]] = fdiv x86_fp80 [[TMP18]], [[TMP13]] +// PRMTD-NEXT: br label [[COMPLEX_DIV]] +// PRMTD: complex_div: +// PRMTD-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to float +// PRMTD-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to float +// PRMTD-NEXT: [[EXT:%.*]] = fpext float [[CONV2]] to double +// PRMTD-NEXT: [[EXT4:%.*]] = fpext float [[CONV3]] to double +// PRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD-NEXT: [[EXT5:%.*]] = fpext float [[A_REAL]] to double +// PRMTD-NEXT: [[EXT6:%.*]] = fpext float [[A_IMAG]] to double +// PRMTD-NEXT: [[TMP22:%.*]] = fmul double [[EXT]], [[EXT5]] +// PRMTD-NEXT: [[TMP23:%.*]] = fmul double [[EXT4]], [[EXT6]] +// PRMTD-NEXT: [[TMP24:%.*]] = fadd double [[TMP22]], [[TMP23]] +// PRMTD-NEXT: [[TMP25:%.*]] = fmul double [[EXT5]], [[EXT5]] +// PRMTD-NEXT: [[TMP26:%.*]] = fmul double [[EXT6]], [[EXT6]] +// PRMTD-NEXT: [[TMP27:%.*]] = fadd double [[TMP25]], [[TMP26]] +// PRMTD-NEXT: [[TMP28:%.*]] = fmul double [[EXT4]], [[EXT5]] +// PRMTD-NEXT: [[TMP29:%.*]] = fmul double [[EXT]], [[EXT6]] +// PRMTD-NEXT: [[TMP30:%.*]] = fsub double [[TMP28]], [[TMP29]] +// PRMTD-NEXT: [[TMP31:%.*]] = fdiv double [[TMP24]], [[TMP27]] +// PRMTD-NEXT: [[TMP32:%.*]] = fdiv double [[TMP30]], [[TMP27]] +// PRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP31]] to float +// PRMTD-NEXT: [[UNPROMOTION7:%.*]] = fptrunc double [[TMP32]] to float +// PRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// PRMTD-NEXT: store float [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD-NEXT: [[TMP33:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD-NEXT: ret <2 x float> [[TMP33]] +// +// X86WINPRMTD-LABEL: define dso_local i64 @f1( +// X86WINPRMTD-SAME: i64 noundef [[A_COERCE:%.*]], ptr noundef [[B:%.*]], i64 noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// X86WINPRMTD-NEXT: entry: +// X86WINPRMTD-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD-NEXT: store i64 [[A_COERCE]], ptr [[A]], align 4 +// X86WINPRMTD-NEXT: store i64 [[C_COERCE]], ptr [[C]], align 4 +// X86WINPRMTD-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// X86WINPRMTD-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to double +// X86WINPRMTD-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to double +// X86WINPRMTD-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) +// X86WINPRMTD-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) +// X86WINPRMTD-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP0]], [[TMP1]] +// X86WINPRMTD-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// X86WINPRMTD: abs_rhsr_greater_or_equal_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP2:%.*]] = fdiv double [[CONV1]], [[CONV]] +// X86WINPRMTD-NEXT: [[TMP3:%.*]] = fmul double [[TMP2]], [[CONV1]] +// X86WINPRMTD-NEXT: [[TMP4:%.*]] = fadd double [[CONV]], [[TMP3]] +// X86WINPRMTD-NEXT: [[TMP5:%.*]] = fmul double [[B_IMAG]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP6:%.*]] = fadd double [[B_REAL]], [[TMP5]] +// X86WINPRMTD-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], [[TMP4]] +// X86WINPRMTD-NEXT: [[TMP8:%.*]] = fmul double [[B_REAL]], [[TMP2]] +// X86WINPRMTD-NEXT: [[TMP9:%.*]] = fsub double [[B_IMAG]], [[TMP8]] +// X86WINPRMTD-NEXT: [[TMP10:%.*]] = fdiv double [[TMP9]], [[TMP4]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV:%.*]] +// X86WINPRMTD: abs_rhsr_less_than_abs_rhsi: +// X86WINPRMTD-NEXT: [[TMP11:%.*]] = fdiv double [[CONV]], [[CONV1]] +// X86WINPRMTD-NEXT: [[TMP12:%.*]] = fmul double [[TMP11]], [[CONV]] +// X86WINPRMTD-NEXT: [[TMP13:%.*]] = fadd double [[CONV1]], [[TMP12]] +// X86WINPRMTD-NEXT: [[TMP14:%.*]] = fmul double [[B_REAL]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP15:%.*]] = fadd double [[TMP14]], [[B_IMAG]] +// X86WINPRMTD-NEXT: [[TMP16:%.*]] = fdiv double [[TMP15]], [[TMP13]] +// X86WINPRMTD-NEXT: [[TMP17:%.*]] = fmul double [[B_IMAG]], [[TMP11]] +// X86WINPRMTD-NEXT: [[TMP18:%.*]] = fsub double [[TMP17]], [[B_REAL]] +// X86WINPRMTD-NEXT: [[TMP19:%.*]] = fdiv double [[TMP18]], [[TMP13]] +// X86WINPRMTD-NEXT: br label [[COMPLEX_DIV]] +// X86WINPRMTD: complex_div: +// X86WINPRMTD-NEXT: [[TMP20:%.*]] = phi double [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD-NEXT: [[CONV2:%.*]] = fptrunc double [[TMP20]] to float +// X86WINPRMTD-NEXT: [[CONV3:%.*]] = fptrunc double [[TMP21]] to float +// X86WINPRMTD-NEXT: [[EXT:%.*]] = fpext float [[CONV2]] to double +// X86WINPRMTD-NEXT: [[EXT4:%.*]] = fpext float [[CONV3]] to double +// X86WINPRMTD-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// X86WINPRMTD-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[EXT5:%.*]] = fpext float [[A_REAL]] to double +// X86WINPRMTD-NEXT: [[EXT6:%.*]] = fpext float [[A_IMAG]] to double +// X86WINPRMTD-NEXT: [[TMP22:%.*]] = fmul double [[EXT]], [[EXT5]] +// X86WINPRMTD-NEXT: [[TMP23:%.*]] = fmul double [[EXT4]], [[EXT6]] +// X86WINPRMTD-NEXT: [[TMP24:%.*]] = fadd double [[TMP22]], [[TMP23]] +// X86WINPRMTD-NEXT: [[TMP25:%.*]] = fmul double [[EXT5]], [[EXT5]] +// X86WINPRMTD-NEXT: [[TMP26:%.*]] = fmul double [[EXT6]], [[EXT6]] +// X86WINPRMTD-NEXT: [[TMP27:%.*]] = fadd double [[TMP25]], [[TMP26]] +// X86WINPRMTD-NEXT: [[TMP28:%.*]] = fmul double [[EXT4]], [[EXT5]] +// X86WINPRMTD-NEXT: [[TMP29:%.*]] = fmul double [[EXT]], [[EXT6]] +// X86WINPRMTD-NEXT: [[TMP30:%.*]] = fsub double [[TMP28]], [[TMP29]] +// X86WINPRMTD-NEXT: [[TMP31:%.*]] = fdiv double [[TMP24]], [[TMP27]] +// X86WINPRMTD-NEXT: [[TMP32:%.*]] = fdiv double [[TMP30]], [[TMP27]] +// X86WINPRMTD-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP31]] to float +// X86WINPRMTD-NEXT: [[UNPROMOTION7:%.*]] = fptrunc double [[TMP32]] to float +// X86WINPRMTD-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// X86WINPRMTD-NEXT: store float [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 4 +// X86WINPRMTD-NEXT: [[TMP33:%.*]] = load i64, ptr [[RETVAL]], align 4 +// X86WINPRMTD-NEXT: ret i64 [[TMP33]] +// +// AVRFP32-LABEL: define dso_local { float, float } @f1( +// AVRFP32-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], float noundef [[B_COERCE0:%.*]], float noundef [[B_COERCE1:%.*]], float noundef [[C_COERCE0:%.*]], float noundef [[C_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP32-NEXT: entry: +// AVRFP32-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[B:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[C:%.*]] = alloca { float, float }, align 1 +// AVRFP32-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP32-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP32-NEXT: [[TMP2:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP32-NEXT: [[TMP3:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP32-NEXT: [[TMP4:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// AVRFP32-NEXT: store float [[C_COERCE0]], ptr [[TMP4]], align 1 +// AVRFP32-NEXT: [[TMP5:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[C_COERCE1]], ptr [[TMP5]], align 1 +// AVRFP32-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// AVRFP32-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 1 +// AVRFP32-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// AVRFP32-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 1 +// AVRFP32-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// AVRFP32-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 1 +// AVRFP32-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// AVRFP32-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP6:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[C_REAL]]) +// AVRFP32-NEXT: [[TMP7:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[C_IMAG]]) +// AVRFP32-NEXT: [[ABS_CMP:%.*]] = fcmp ugt float [[TMP6]], [[TMP7]] +// AVRFP32-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// AVRFP32: abs_rhsr_greater_or_equal_abs_rhsi: +// AVRFP32-NEXT: [[TMP8:%.*]] = fdiv float [[C_IMAG]], [[C_REAL]] +// AVRFP32-NEXT: [[TMP9:%.*]] = fmul float [[TMP8]], [[C_IMAG]] +// AVRFP32-NEXT: [[TMP10:%.*]] = fadd float [[C_REAL]], [[TMP9]] +// AVRFP32-NEXT: [[TMP11:%.*]] = fmul float [[B_IMAG]], [[TMP8]] +// AVRFP32-NEXT: [[TMP12:%.*]] = fadd float [[B_REAL]], [[TMP11]] +// AVRFP32-NEXT: [[TMP13:%.*]] = fdiv float [[TMP12]], [[TMP10]] +// AVRFP32-NEXT: [[TMP14:%.*]] = fmul float [[B_REAL]], [[TMP8]] +// AVRFP32-NEXT: [[TMP15:%.*]] = fsub float [[B_IMAG]], [[TMP14]] +// AVRFP32-NEXT: [[TMP16:%.*]] = fdiv float [[TMP15]], [[TMP10]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV:%.*]] +// AVRFP32: abs_rhsr_less_than_abs_rhsi: +// AVRFP32-NEXT: [[TMP17:%.*]] = fdiv float [[C_REAL]], [[C_IMAG]] +// AVRFP32-NEXT: [[TMP18:%.*]] = fmul float [[TMP17]], [[C_REAL]] +// AVRFP32-NEXT: [[TMP19:%.*]] = fadd float [[C_IMAG]], [[TMP18]] +// AVRFP32-NEXT: [[TMP20:%.*]] = fmul float [[B_REAL]], [[TMP17]] +// AVRFP32-NEXT: [[TMP21:%.*]] = fadd float [[TMP20]], [[B_IMAG]] +// AVRFP32-NEXT: [[TMP22:%.*]] = fdiv float [[TMP21]], [[TMP19]] +// AVRFP32-NEXT: [[TMP23:%.*]] = fmul float [[B_IMAG]], [[TMP17]] +// AVRFP32-NEXT: [[TMP24:%.*]] = fsub float [[TMP23]], [[B_REAL]] +// AVRFP32-NEXT: [[TMP25:%.*]] = fdiv float [[TMP24]], [[TMP19]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV]] +// AVRFP32: complex_div: +// AVRFP32-NEXT: [[TMP26:%.*]] = phi float [ [[TMP13]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP22]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[TMP27:%.*]] = phi float [ [[TMP16]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP25]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP32-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP32-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP32-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP32-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP28:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[A_REAL]]) +// AVRFP32-NEXT: [[TMP29:%.*]] = call addrspace(1) float @llvm.fabs.f32(float [[A_IMAG]]) +// AVRFP32-NEXT: [[ABS_CMP1:%.*]] = fcmp ugt float [[TMP28]], [[TMP29]] +// AVRFP32-NEXT: br i1 [[ABS_CMP1]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI2:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI3:%.*]] +// AVRFP32: abs_rhsr_greater_or_equal_abs_rhsi2: +// AVRFP32-NEXT: [[TMP30:%.*]] = fdiv float [[A_IMAG]], [[A_REAL]] +// AVRFP32-NEXT: [[TMP31:%.*]] = fmul float [[TMP30]], [[A_IMAG]] +// AVRFP32-NEXT: [[TMP32:%.*]] = fadd float [[A_REAL]], [[TMP31]] +// AVRFP32-NEXT: [[TMP33:%.*]] = fmul float [[TMP27]], [[TMP30]] +// AVRFP32-NEXT: [[TMP34:%.*]] = fadd float [[TMP26]], [[TMP33]] +// AVRFP32-NEXT: [[TMP35:%.*]] = fdiv float [[TMP34]], [[TMP32]] +// AVRFP32-NEXT: [[TMP36:%.*]] = fmul float [[TMP26]], [[TMP30]] +// AVRFP32-NEXT: [[TMP37:%.*]] = fsub float [[TMP27]], [[TMP36]] +// AVRFP32-NEXT: [[TMP38:%.*]] = fdiv float [[TMP37]], [[TMP32]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV4:%.*]] +// AVRFP32: abs_rhsr_less_than_abs_rhsi3: +// AVRFP32-NEXT: [[TMP39:%.*]] = fdiv float [[A_REAL]], [[A_IMAG]] +// AVRFP32-NEXT: [[TMP40:%.*]] = fmul float [[TMP39]], [[A_REAL]] +// AVRFP32-NEXT: [[TMP41:%.*]] = fadd float [[A_IMAG]], [[TMP40]] +// AVRFP32-NEXT: [[TMP42:%.*]] = fmul float [[TMP26]], [[TMP39]] +// AVRFP32-NEXT: [[TMP43:%.*]] = fadd float [[TMP42]], [[TMP27]] +// AVRFP32-NEXT: [[TMP44:%.*]] = fdiv float [[TMP43]], [[TMP41]] +// AVRFP32-NEXT: [[TMP45:%.*]] = fmul float [[TMP27]], [[TMP39]] +// AVRFP32-NEXT: [[TMP46:%.*]] = fsub float [[TMP45]], [[TMP26]] +// AVRFP32-NEXT: [[TMP47:%.*]] = fdiv float [[TMP46]], [[TMP41]] +// AVRFP32-NEXT: br label [[COMPLEX_DIV4]] +// AVRFP32: complex_div4: +// AVRFP32-NEXT: [[TMP48:%.*]] = phi float [ [[TMP35]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI2]] ], [ [[TMP44]], [[ABS_RHSR_LESS_THAN_ABS_RHSI3]] ] +// AVRFP32-NEXT: [[TMP49:%.*]] = phi float [ [[TMP38]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI2]] ], [ [[TMP47]], [[ABS_RHSR_LESS_THAN_ABS_RHSI3]] ] +// AVRFP32-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP32-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP32-NEXT: store float [[TMP48]], ptr [[RETVAL_REALP]], align 1 +// AVRFP32-NEXT: store float [[TMP49]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP32-NEXT: [[TMP50:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP32-NEXT: ret { float, float } [[TMP50]] +// +// AVRFP64-LABEL: define dso_local { float, float } @f1( +// AVRFP64-SAME: float noundef [[A_COERCE0:%.*]], float noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]], float noundef [[C_COERCE0:%.*]], float noundef [[C_COERCE1:%.*]]) addrspace(1) #[[ATTR0]] { +// AVRFP64-NEXT: entry: +// AVRFP64-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[A:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[B:%.*]] = alloca { double, double }, align 1 +// AVRFP64-NEXT: [[C:%.*]] = alloca { float, float }, align 1 +// AVRFP64-NEXT: [[TMP0:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: store float [[A_COERCE0]], ptr [[TMP0]], align 1 +// AVRFP64-NEXT: [[TMP1:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[A_COERCE1]], ptr [[TMP1]], align 1 +// AVRFP64-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 1 +// AVRFP64-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 1 +// AVRFP64-NEXT: [[TMP4:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// AVRFP64-NEXT: store float [[C_COERCE0]], ptr [[TMP4]], align 1 +// AVRFP64-NEXT: [[TMP5:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[C_COERCE1]], ptr [[TMP5]], align 1 +// AVRFP64-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// AVRFP64-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 1 +// AVRFP64-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// AVRFP64-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 1 +// AVRFP64-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// AVRFP64-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 1 +// AVRFP64-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// AVRFP64-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 1 +// AVRFP64-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to double +// AVRFP64-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to double +// AVRFP64-NEXT: [[TMP6:%.*]] = call addrspace(1) double @llvm.fabs.f64(double [[CONV]]) +// AVRFP64-NEXT: [[TMP7:%.*]] = call addrspace(1) double @llvm.fabs.f64(double [[CONV1]]) +// AVRFP64-NEXT: [[ABS_CMP:%.*]] = fcmp ugt double [[TMP6]], [[TMP7]] +// AVRFP64-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// AVRFP64: abs_rhsr_greater_or_equal_abs_rhsi: +// AVRFP64-NEXT: [[TMP8:%.*]] = fdiv double [[CONV1]], [[CONV]] +// AVRFP64-NEXT: [[TMP9:%.*]] = fmul double [[TMP8]], [[CONV1]] +// AVRFP64-NEXT: [[TMP10:%.*]] = fadd double [[CONV]], [[TMP9]] +// AVRFP64-NEXT: [[TMP11:%.*]] = fmul double [[B_IMAG]], [[TMP8]] +// AVRFP64-NEXT: [[TMP12:%.*]] = fadd double [[B_REAL]], [[TMP11]] +// AVRFP64-NEXT: [[TMP13:%.*]] = fdiv double [[TMP12]], [[TMP10]] +// AVRFP64-NEXT: [[TMP14:%.*]] = fmul double [[B_REAL]], [[TMP8]] +// AVRFP64-NEXT: [[TMP15:%.*]] = fsub double [[B_IMAG]], [[TMP14]] +// AVRFP64-NEXT: [[TMP16:%.*]] = fdiv double [[TMP15]], [[TMP10]] +// AVRFP64-NEXT: br label [[COMPLEX_DIV:%.*]] +// AVRFP64: abs_rhsr_less_than_abs_rhsi: +// AVRFP64-NEXT: [[TMP17:%.*]] = fdiv double [[CONV]], [[CONV1]] +// AVRFP64-NEXT: [[TMP18:%.*]] = fmul double [[TMP17]], [[CONV]] +// AVRFP64-NEXT: [[TMP19:%.*]] = fadd double [[CONV1]], [[TMP18]] +// AVRFP64-NEXT: [[TMP20:%.*]] = fmul double [[B_REAL]], [[TMP17]] +// AVRFP64-NEXT: [[TMP21:%.*]] = fadd double [[TMP20]], [[B_IMAG]] +// AVRFP64-NEXT: [[TMP22:%.*]] = fdiv double [[TMP21]], [[TMP19]] +// AVRFP64-NEXT: [[TMP23:%.*]] = fmul double [[B_IMAG]], [[TMP17]] +// AVRFP64-NEXT: [[TMP24:%.*]] = fsub double [[TMP23]], [[B_REAL]] +// AVRFP64-NEXT: [[TMP25:%.*]] = fdiv double [[TMP24]], [[TMP19]] +// AVRFP64-NEXT: br label [[COMPLEX_DIV]] +// AVRFP64: complex_div: +// AVRFP64-NEXT: [[TMP26:%.*]] = phi double [ [[TMP13]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP22]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP64-NEXT: [[TMP27:%.*]] = phi double [ [[TMP16]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP25]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// AVRFP64-NEXT: [[CONV2:%.*]] = fptrunc double [[TMP26]] to float +// AVRFP64-NEXT: [[CONV3:%.*]] = fptrunc double [[TMP27]] to float +// AVRFP64-NEXT: [[EXT:%.*]] = fpext float [[CONV2]] to double +// AVRFP64-NEXT: [[EXT4:%.*]] = fpext float [[CONV3]] to double +// AVRFP64-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// AVRFP64-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 1 +// AVRFP64-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// AVRFP64-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 1 +// AVRFP64-NEXT: [[EXT5:%.*]] = fpext float [[A_REAL]] to double +// AVRFP64-NEXT: [[EXT6:%.*]] = fpext float [[A_IMAG]] to double +// AVRFP64-NEXT: [[TMP28:%.*]] = fmul double [[EXT]], [[EXT5]] +// AVRFP64-NEXT: [[TMP29:%.*]] = fmul double [[EXT4]], [[EXT6]] +// AVRFP64-NEXT: [[TMP30:%.*]] = fadd double [[TMP28]], [[TMP29]] +// AVRFP64-NEXT: [[TMP31:%.*]] = fmul double [[EXT5]], [[EXT5]] +// AVRFP64-NEXT: [[TMP32:%.*]] = fmul double [[EXT6]], [[EXT6]] +// AVRFP64-NEXT: [[TMP33:%.*]] = fadd double [[TMP31]], [[TMP32]] +// AVRFP64-NEXT: [[TMP34:%.*]] = fmul double [[EXT4]], [[EXT5]] +// AVRFP64-NEXT: [[TMP35:%.*]] = fmul double [[EXT]], [[EXT6]] +// AVRFP64-NEXT: [[TMP36:%.*]] = fsub double [[TMP34]], [[TMP35]] +// AVRFP64-NEXT: [[TMP37:%.*]] = fdiv double [[TMP30]], [[TMP33]] +// AVRFP64-NEXT: [[TMP38:%.*]] = fdiv double [[TMP36]], [[TMP33]] +// AVRFP64-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP37]] to float +// AVRFP64-NEXT: [[UNPROMOTION7:%.*]] = fptrunc double [[TMP38]] to float +// AVRFP64-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// AVRFP64-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// AVRFP64-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 1 +// AVRFP64-NEXT: store float [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 1 +// AVRFP64-NEXT: [[TMP39:%.*]] = load { float, float }, ptr [[RETVAL]], align 1 +// AVRFP64-NEXT: ret { float, float } [[TMP39]] +// +// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @f1( +// BASIC_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// BASIC_FAST-NEXT: entry: +// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// BASIC_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// BASIC_FAST-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// BASIC_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// BASIC_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// BASIC_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// BASIC_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// BASIC_FAST-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// BASIC_FAST-NEXT: [[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[CONV]] +// BASIC_FAST-NEXT: [[TMP1:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[CONV1]] +// BASIC_FAST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP0]], [[TMP1]] +// BASIC_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[CONV]] +// BASIC_FAST-NEXT: [[TMP4:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[CONV1]] +// BASIC_FAST-NEXT: [[TMP5:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP3]], [[TMP4]] +// BASIC_FAST-NEXT: [[TMP6:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[CONV]] +// BASIC_FAST-NEXT: [[TMP7:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[CONV1]] +// BASIC_FAST-NEXT: [[TMP8:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP7]] +// BASIC_FAST-NEXT: [[TMP9:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[TMP5]] +// BASIC_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP8]], [[TMP5]] +// BASIC_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP9]] to float +// BASIC_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP10]] to float +// BASIC_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// BASIC_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// BASIC_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[TMP11:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV2]], [[A_REAL]] +// BASIC_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV3]], [[A_IMAG]] +// BASIC_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP11]], [[TMP12]] +// BASIC_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[A_REAL]] +// BASIC_FAST-NEXT: [[TMP15:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[A_IMAG]] +// BASIC_FAST-NEXT: [[TMP16:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP14]], [[TMP15]] +// BASIC_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV3]], [[A_REAL]] +// BASIC_FAST-NEXT: [[TMP18:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV2]], [[A_IMAG]] +// BASIC_FAST-NEXT: [[TMP19:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP17]], [[TMP18]] +// BASIC_FAST-NEXT: [[TMP20:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP13]], [[TMP16]] +// BASIC_FAST-NEXT: [[TMP21:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP19]], [[TMP16]] +// BASIC_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// BASIC_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// BASIC_FAST-NEXT: store float [[TMP20]], ptr [[RETVAL_REALP]], align 4 +// BASIC_FAST-NEXT: store float [[TMP21]], ptr [[RETVAL_IMAGP]], align 4 +// BASIC_FAST-NEXT: [[TMP22:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// BASIC_FAST-NEXT: ret <2 x float> [[TMP22]] +// +// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @f1( +// FULL_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// FULL_FAST-NEXT: entry: +// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// FULL_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// FULL_FAST-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// FULL_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// FULL_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// FULL_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// FULL_FAST-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// FULL_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// FULL_FAST-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// FULL_FAST-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// FULL_FAST-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 +// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 +// FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to float +// FULL_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP1]] to float +// FULL_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// FULL_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// FULL_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// FULL_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// FULL_FAST-NEXT: [[CALL4:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) <2 x float> @__divsc3(float noundef nofpclass(nan inf) [[CONV2]], float noundef nofpclass(nan inf) [[CONV3]], float noundef nofpclass(nan inf) [[A_REAL]], float noundef nofpclass(nan inf) [[A_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: store <2 x float> [[CALL4]], ptr [[COERCE]], align 4 +// FULL_FAST-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// FULL_FAST-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// FULL_FAST-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// FULL_FAST-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// FULL_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// FULL_FAST-NEXT: store float [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 4 +// FULL_FAST-NEXT: store float [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 4 +// FULL_FAST-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// FULL_FAST-NEXT: ret <2 x float> [[TMP2]] +// +// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @f1( +// IMPRVD_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// IMPRVD_FAST-NEXT: entry: +// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// IMPRVD_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// IMPRVD_FAST-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// IMPRVD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// IMPRVD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// IMPRVD_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// IMPRVD_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// IMPRVD_FAST-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// IMPRVD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt x86_fp80 [[TMP0]], [[TMP1]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[CONV]] +// IMPRVD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[CONV1]] +// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[TMP3]] +// IMPRVD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP5]] +// IMPRVD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP4]] +// IMPRVD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP2]] +// IMPRVD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP8]] +// IMPRVD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP9]], [[TMP4]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi: +// IMPRVD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[CONV1]] +// IMPRVD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP11]], [[CONV]] +// IMPRVD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[TMP12]] +// IMPRVD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP14]], [[B_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP15]], [[TMP13]] +// IMPRVD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP11]] +// IMPRVD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP17]], [[B_REAL]] +// IMPRVD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP18]], [[TMP13]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV]] +// IMPRVD_FAST: complex_div: +// IMPRVD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// IMPRVD_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to float +// IMPRVD_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to float +// IMPRVD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// IMPRVD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[TMP22:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[A_REAL]]) +// IMPRVD_FAST-NEXT: [[TMP23:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float [[A_IMAG]]) +// IMPRVD_FAST-NEXT: [[ABS_CMP4:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt float [[TMP22]], [[TMP23]] +// IMPRVD_FAST-NEXT: br i1 [[ABS_CMP4]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI5:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI6:%.*]] +// IMPRVD_FAST: abs_rhsr_greater_or_equal_abs_rhsi5: +// IMPRVD_FAST-NEXT: [[TMP24:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[A_REAL]] +// IMPRVD_FAST-NEXT: [[TMP25:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP24]], [[A_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP26:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[TMP25]] +// IMPRVD_FAST-NEXT: [[TMP27:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV3]], [[TMP24]] +// IMPRVD_FAST-NEXT: [[TMP28:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[CONV2]], [[TMP27]] +// IMPRVD_FAST-NEXT: [[TMP29:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP28]], [[TMP26]] +// IMPRVD_FAST-NEXT: [[TMP30:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV2]], [[TMP24]] +// IMPRVD_FAST-NEXT: [[TMP31:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[CONV3]], [[TMP30]] +// IMPRVD_FAST-NEXT: [[TMP32:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP31]], [[TMP26]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV7:%.*]] +// IMPRVD_FAST: abs_rhsr_less_than_abs_rhsi6: +// IMPRVD_FAST-NEXT: [[TMP33:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[A_REAL]], [[A_IMAG]] +// IMPRVD_FAST-NEXT: [[TMP34:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[TMP33]], [[A_REAL]] +// IMPRVD_FAST-NEXT: [[TMP35:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[A_IMAG]], [[TMP34]] +// IMPRVD_FAST-NEXT: [[TMP36:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV2]], [[TMP33]] +// IMPRVD_FAST-NEXT: [[TMP37:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[TMP36]], [[CONV3]] +// IMPRVD_FAST-NEXT: [[TMP38:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP37]], [[TMP35]] +// IMPRVD_FAST-NEXT: [[TMP39:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[CONV3]], [[TMP33]] +// IMPRVD_FAST-NEXT: [[TMP40:%.*]] = fsub reassoc nnan ninf nsz arcp afn float [[TMP39]], [[CONV2]] +// IMPRVD_FAST-NEXT: [[TMP41:%.*]] = fdiv reassoc nnan ninf nsz arcp afn float [[TMP40]], [[TMP35]] +// IMPRVD_FAST-NEXT: br label [[COMPLEX_DIV7]] +// IMPRVD_FAST: complex_div7: +// IMPRVD_FAST-NEXT: [[TMP42:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP29]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI5]] ], [ [[TMP38]], [[ABS_RHSR_LESS_THAN_ABS_RHSI6]] ] +// IMPRVD_FAST-NEXT: [[TMP43:%.*]] = phi reassoc nnan ninf nsz arcp afn float [ [[TMP32]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI5]] ], [ [[TMP41]], [[ABS_RHSR_LESS_THAN_ABS_RHSI6]] ] +// IMPRVD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// IMPRVD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// IMPRVD_FAST-NEXT: store float [[TMP42]], ptr [[RETVAL_REALP]], align 4 +// IMPRVD_FAST-NEXT: store float [[TMP43]], ptr [[RETVAL_IMAGP]], align 4 +// IMPRVD_FAST-NEXT: [[TMP44:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// IMPRVD_FAST-NEXT: ret <2 x float> [[TMP44]] +// +// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) <2 x float> @f1( +// PRMTD_FAST-SAME: <2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD_FAST-NEXT: entry: +// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// PRMTD_FAST-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD_FAST-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// PRMTD_FAST-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD_FAST-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// PRMTD_FAST-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 +// PRMTD_FAST-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 +// PRMTD_FAST-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// PRMTD_FAST-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) +// PRMTD_FAST-NEXT: [[ABS_CMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn ugt x86_fp80 [[TMP0]], [[TMP1]] +// PRMTD_FAST-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD_FAST: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD_FAST-NEXT: [[TMP2:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[CONV]] +// PRMTD_FAST-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP2]], [[CONV1]] +// PRMTD_FAST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[TMP3]] +// PRMTD_FAST-NEXT: [[TMP5:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP2]] +// PRMTD_FAST-NEXT: [[TMP6:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP5]] +// PRMTD_FAST-NEXT: [[TMP7:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP6]], [[TMP4]] +// PRMTD_FAST-NEXT: [[TMP8:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP2]] +// PRMTD_FAST-NEXT: [[TMP9:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP8]] +// PRMTD_FAST-NEXT: [[TMP10:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP9]], [[TMP4]] +// PRMTD_FAST-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD_FAST: abs_rhsr_less_than_abs_rhsi: +// PRMTD_FAST-NEXT: [[TMP11:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV]], [[CONV1]] +// PRMTD_FAST-NEXT: [[TMP12:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP11]], [[CONV]] +// PRMTD_FAST-NEXT: [[TMP13:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[CONV1]], [[TMP12]] +// PRMTD_FAST-NEXT: [[TMP14:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_REAL]], [[TMP11]] +// PRMTD_FAST-NEXT: [[TMP15:%.*]] = fadd reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP14]], [[B_IMAG]] +// PRMTD_FAST-NEXT: [[TMP16:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP15]], [[TMP13]] +// PRMTD_FAST-NEXT: [[TMP17:%.*]] = fmul reassoc nnan ninf nsz arcp afn x86_fp80 [[B_IMAG]], [[TMP11]] +// PRMTD_FAST-NEXT: [[TMP18:%.*]] = fsub reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP17]], [[B_REAL]] +// PRMTD_FAST-NEXT: [[TMP19:%.*]] = fdiv reassoc nnan ninf nsz arcp afn x86_fp80 [[TMP18]], [[TMP13]] +// PRMTD_FAST-NEXT: br label [[COMPLEX_DIV]] +// PRMTD_FAST: complex_div: +// PRMTD_FAST-NEXT: [[TMP20:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_FAST-NEXT: [[TMP21:%.*]] = phi reassoc nnan ninf nsz arcp afn x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP20]] to float +// PRMTD_FAST-NEXT: [[CONV3:%.*]] = fptrunc x86_fp80 [[TMP21]] to float +// PRMTD_FAST-NEXT: [[EXT:%.*]] = fpext float [[CONV2]] to double +// PRMTD_FAST-NEXT: [[EXT4:%.*]] = fpext float [[CONV3]] to double +// PRMTD_FAST-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD_FAST-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD_FAST-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[EXT5:%.*]] = fpext float [[A_REAL]] to double +// PRMTD_FAST-NEXT: [[EXT6:%.*]] = fpext float [[A_IMAG]] to double +// PRMTD_FAST-NEXT: [[TMP22:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT]], [[EXT5]] +// PRMTD_FAST-NEXT: [[TMP23:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT4]], [[EXT6]] +// PRMTD_FAST-NEXT: [[TMP24:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[TMP22]], [[TMP23]] +// PRMTD_FAST-NEXT: [[TMP25:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT5]], [[EXT5]] +// PRMTD_FAST-NEXT: [[TMP26:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT6]], [[EXT6]] +// PRMTD_FAST-NEXT: [[TMP27:%.*]] = fadd reassoc nnan ninf nsz arcp afn double [[TMP25]], [[TMP26]] +// PRMTD_FAST-NEXT: [[TMP28:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT4]], [[EXT5]] +// PRMTD_FAST-NEXT: [[TMP29:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[EXT]], [[EXT6]] +// PRMTD_FAST-NEXT: [[TMP30:%.*]] = fsub reassoc nnan ninf nsz arcp afn double [[TMP28]], [[TMP29]] +// PRMTD_FAST-NEXT: [[TMP31:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP24]], [[TMP27]] +// PRMTD_FAST-NEXT: [[TMP32:%.*]] = fdiv reassoc nnan ninf nsz arcp afn double [[TMP30]], [[TMP27]] +// PRMTD_FAST-NEXT: [[UNPROMOTION:%.*]] = fptrunc double [[TMP31]] to float +// PRMTD_FAST-NEXT: [[UNPROMOTION7:%.*]] = fptrunc double [[TMP32]] to float +// PRMTD_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_FAST-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_FAST-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// PRMTD_FAST-NEXT: store float [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD_FAST-NEXT: [[TMP33:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD_FAST-NEXT: ret <2 x float> [[TMP33]] +// +_Complex float f1(_Complex float a, _Complex long double b, _Complex float c) { + return (_Complex float)(b / c) / a; +} From dec045b50ecba655b6271d2e739d0597f41cb207 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Wed, 20 Mar 2024 04:32:18 -0700 Subject: [PATCH 18/18] Changed the type of FPHasBeenPromoted to bool. --- clang/lib/CodeGen/CGExprComplex.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 7dddb9fcfd06f..27ddaacc28f52 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -51,12 +51,12 @@ class ComplexExprEmitter CGBuilderTy &Builder; bool IgnoreReal; bool IgnoreImag; - LangOptions::ComplexRangeKind FPHasBeenPromoted; + bool FPHasBeenPromoted; public: ComplexExprEmitter(CodeGenFunction &cgf, bool ir = false, bool ii = false) : CGF(cgf), Builder(CGF.Builder), IgnoreReal(ir), IgnoreImag(ii), - FPHasBeenPromoted(LangOptions::ComplexRangeKind::CX_None) {} + FPHasBeenPromoted(false) {} //===--------------------------------------------------------------------===// // Utilities @@ -321,7 +321,7 @@ class ComplexExprEmitter llvm::APFloat::semanticsMaxExponent(HigherElementTypeSemantics)) { return CGF.getContext().getComplexType(HigherElementType); } else { - FPHasBeenPromoted = LangOptions::ComplexRangeKind::CX_Improved; + FPHasBeenPromoted = true; DiagnosticsEngine &Diags = CGF.CGM.getDiags(); Diags.Report(diag::warn_next_larger_fp_type_same_size_than_fp); return CGF.getContext().getComplexType(ElementType); @@ -1037,7 +1037,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { QualType ComplexElementTy = Op.Ty->castAs()->getElementType(); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || (Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted && - FPHasBeenPromoted == LangOptions::CX_Improved)) + FPHasBeenPromoted)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted)