From 94b8fbe4219ebc3642ebe4d03dc02720be3bb568 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Thu, 24 Apr 2025 10:38:22 +0300 Subject: [PATCH] [Xtensa] Implement Xtensa Floating Point Option. --- .../Disassembler/XtensaDisassembler.cpp | 38 ++ .../MCTargetDesc/XtensaMCCodeEmitter.cpp | 5 + llvm/lib/Target/Xtensa/XtensaCallingConv.td | 1 + llvm/lib/Target/Xtensa/XtensaFeatures.td | 5 + llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 168 +++++- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 23 + llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp | 14 +- llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 291 +++++++++ llvm/lib/Target/Xtensa/XtensaOperators.td | 17 + llvm/lib/Target/Xtensa/XtensaRegisterInfo.td | 44 ++ llvm/lib/Target/Xtensa/XtensaSubtarget.h | 1 + llvm/test/CodeGen/Xtensa/float-arith.ll | 569 ++++++++++++++++++ llvm/test/MC/Xtensa/float-err.s | 37 ++ llvm/test/MC/Xtensa/float.s | 177 ++++++ 14 files changed, 1384 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/float-arith.ll create mode 100644 llvm/test/MC/Xtensa/float-err.s create mode 100644 llvm/test/MC/Xtensa/float.s diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp index 6b355e6363b22..237aed3cc71ea 100644 --- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp +++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp @@ -113,6 +113,44 @@ static DecodeStatus DecodeMR23RegisterClass(MCInst &Inst, uint64_t RegNo, return MCDisassembler::Success; } +static const unsigned FPRDecoderTable[] = { + Xtensa::F0, Xtensa::F1, Xtensa::F2, Xtensa::F3, Xtensa::F4, Xtensa::F5, + Xtensa::F6, Xtensa::F7, Xtensa::F8, Xtensa::F9, Xtensa::F10, Xtensa::F11, + Xtensa::F12, Xtensa::F13, Xtensa::F14, Xtensa::F15}; + +static DecodeStatus DecodeFPRRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= std::size(FPRDecoderTable)) + return MCDisassembler::Fail; + + unsigned Reg = FPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static const unsigned URDecoderTable[] = {Xtensa::FCR, 232, Xtensa::FSR, 233}; + +static DecodeStatus DecodeURRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + const llvm::MCSubtargetInfo STI = + ((const MCDisassembler *)Decoder)->getSubtargetInfo(); + + if (RegNo > 255) + return MCDisassembler::Fail; + + for (unsigned i = 0; i < std::size(URDecoderTable); i += 2) { + if (URDecoderTable[i + 1] == RegNo) { + unsigned Reg = URDecoderTable[i]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; + } + } + + return MCDisassembler::Fail; +} + const MCPhysReg SRDecoderTable[] = { Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17, Xtensa::M0, 32, Xtensa::M1, 33, Xtensa::M2, 34, diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp index 8231a8a9a44d4..03b3ed0c121be 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp @@ -307,6 +307,11 @@ XtensaMCCodeEmitter::getMemRegEncoding(const MCInst &MI, unsigned OpNo, case Xtensa::L32I: case Xtensa::S32I_N: case Xtensa::L32I_N: + case Xtensa::SSI: + case Xtensa::SSIP: + case Xtensa::LSI: + case Xtensa::LSIP: + if (Res & 0x3) { report_fatal_error("Unexpected operand value!"); } diff --git a/llvm/lib/Target/Xtensa/XtensaCallingConv.td b/llvm/lib/Target/Xtensa/XtensaCallingConv.td index 2c48f8f86cafb..96528ed283a30 100644 --- a/llvm/lib/Target/Xtensa/XtensaCallingConv.td +++ b/llvm/lib/Target/Xtensa/XtensaCallingConv.td @@ -15,6 +15,7 @@ def RetCC_Xtensa : CallingConv<[ // First two return values go in a2, a3, a4, a5 CCIfType<[i32], CCAssignToReg<[A2, A3, A4, A5]>>, + CCIfType<[f32], CCAssignToReg<[A2, A3, A4, A5]>>, CCIfType<[i64], CCAssignToRegWithShadow<[A2, A4], [A3, A5]>> ]>; diff --git a/llvm/lib/Target/Xtensa/XtensaFeatures.td b/llvm/lib/Target/Xtensa/XtensaFeatures.td index 2a47214946401..31084d63879d1 100644 --- a/llvm/lib/Target/Xtensa/XtensaFeatures.td +++ b/llvm/lib/Target/Xtensa/XtensaFeatures.td @@ -8,6 +8,11 @@ def FeatureDensity : SubtargetFeature<"density", "HasDensity", "true", def HasDensity : Predicate<"Subtarget->hasDensity()">, AssemblerPredicate<(all_of FeatureDensity)>; +def FeatureSingleFloat : SubtargetFeature<"fp", "HasSingleFloat", "true", + "Enable Xtensa Single FP instructions">; +def HasSingleFloat : Predicate<"Subtarget->hasSingleFloat()">, + AssemblerPredicate<(all_of FeatureSingleFloat)>; + def FeatureWindowed : SubtargetFeature<"windowed", "HasWindowed", "true", "Enable Xtensa Windowed Register option">; def HasWindowed : Predicate<"Subtarget->hasWindowed()">, diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index b17840aad9b4d..83f58607c9d37 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -58,6 +58,10 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, // Set up the register classes. addRegisterClass(MVT::i32, &Xtensa::ARRegClass); + if (Subtarget.hasSingleFloat()) { + addRegisterClass(MVT::f32, &Xtensa::FPRRegClass); + } + if (Subtarget.hasBoolean()) { addRegisterClass(MVT::v1i1, &Xtensa::BRRegClass); } @@ -71,6 +75,8 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::Constant, MVT::i32, Custom); setOperationAction(ISD::Constant, MVT::i64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setBooleanContents(ZeroOrOneBooleanContent); @@ -108,7 +114,10 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); setCondCodeAction(ISD::SETGT, MVT::i32, Expand); setCondCodeAction(ISD::SETLE, MVT::i32, Expand); @@ -175,6 +184,103 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); + // Handle floating-point types. + for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // We can use FI for FRINT. + // setOperationAction(ISD::FRINT, VT, Legal); + if (VT.getSizeInBits() == 32 && Subtarget.hasSingleFloat()) { + setOperationAction(ISD::FABS, VT, Legal); + setOperationAction(ISD::FADD, VT, Legal); + setOperationAction(ISD::FSUB, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FMUL, VT, Legal); + setOperationAction(ISD::FNEG, VT, Legal); + } else { + setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FADD, VT, Expand); + setOperationAction(ISD::FSUB, VT, Expand); + setOperationAction(ISD::FMA, VT, Expand); + setOperationAction(ISD::FMUL, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); + } + + // TODO: once implemented in InstrInfo uncomment + setOperationAction(ISD::FSQRT, VT, Expand); + + // No special instructions for these. + setOperationAction(ISD::FCBRT, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FMAXIMUM, VT, Expand); + setOperationAction(ISD::FMINIMUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::LLRINT, VT, Expand); + setOperationAction(ISD::LLROUND, VT, Expand); + setOperationAction(ISD::LRINT, VT, Expand); + setOperationAction(ISD::LROUND, VT, Expand); + } + } + + // Handle floating-point types. + if (Subtarget.hasSingleFloat()) { + setOperationAction(ISD::BITCAST, MVT::i32, Legal); + setOperationAction(ISD::BITCAST, MVT::f32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); + } else { + setOperationAction(ISD::BITCAST, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); + } + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + + // Needed so that we don't try to implement f128 constant loads using + // a load-and-extend of a f80 constant (in cases where the constant + // would fit in an f80). + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + + // Floating-point truncation and stores need to be done separately. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // Compute derived properties from the register classes computeRegisterProperties(STI.getRegisterInfo()); } @@ -185,6 +291,11 @@ bool XtensaTargetLowering::isOffsetFoldingLegal( return false; } +bool XtensaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + return false; +} + //===----------------------------------------------------------------------===// // Inline asm support //===----------------------------------------------------------------------===// @@ -335,6 +446,16 @@ static bool CC_Xtensa_Custom(unsigned ValNo, MVT ValVT, MVT LocVT, return false; } +/// Return the register type for a given MVT +MVT XtensaTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { + if (VT.isFloatingPoint()) + return MVT::i32; + + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); +} + CCAssignFn *XtensaTargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const { return CC_Xtensa_Custom; @@ -815,6 +936,21 @@ SDValue XtensaTargetLowering::LowerImmediate(SDValue Op, return Op; } +SDValue XtensaTargetLowering::LowerImmediateFP(SDValue Op, + SelectionDAG &DAG) const { + const ConstantFPSDNode *CN = cast(Op); + SDLoc DL(CN); + APFloat apval = CN->getValueAPF(); + int64_t value = llvm::bit_cast(CN->getValueAPF().convertToFloat()); + if (Op.getValueType() == MVT::f32) { + Type *Ty = Type::getInt32Ty(*DAG.getContext()); + Constant *CV = ConstantInt::get(Ty, value); + SDValue CP = DAG.getConstantPool(CV, MVT::i32); + return DAG.getNode(ISD::BITCAST, DL, MVT::f32, CP); + } + return Op; +} + SDValue XtensaTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalAddressSDNode *G = cast(Op); @@ -1248,6 +1384,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, return LowerBR_JT(Op, DAG); case ISD::Constant: return LowerImmediate(Op, DAG); + case ISD::ConstantFP: + return LowerImmediateFP(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::GlobalAddress: @@ -1311,6 +1449,26 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const { return "XtensaISD::SRCL"; case XtensaISD::SRCR: return "XtensaISD::SRCR"; + case XtensaISD::CMPUO: + return "XtensaISD::CMPUO"; + case XtensaISD::CMPUEQ: + return "XtensaISD::CMPUEQ"; + case XtensaISD::CMPULE: + return "XtensaISD::CMPULE"; + case XtensaISD::CMPULT: + return "XtensaISD::CMPULT"; + case XtensaISD::CMPOEQ: + return "XtensaISD::CMPOEQ"; + case XtensaISD::CMPOLE: + return "XtensaISD::CMPOLE"; + case XtensaISD::CMPOLT: + return "XtensaISD::CMPOLT"; + case XtensaISD::MADD: + return "XtensaISD::MADD"; + case XtensaISD::MSUB: + return "XtensaISD::MSUB"; + case XtensaISD::MOVS: + return "XtensaISD::MOVS"; } return nullptr; } @@ -1395,11 +1553,19 @@ MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter( case Xtensa::S16I: case Xtensa::S32I: case Xtensa::S32I_N: + case Xtensa::SSI: + case Xtensa::SSIP: + case Xtensa::SSX: + case Xtensa::SSXP: case Xtensa::L8UI: case Xtensa::L16SI: case Xtensa::L16UI: case Xtensa::L32I: - case Xtensa::L32I_N: { + case Xtensa::L32I_N: + case Xtensa::LSI: + case Xtensa::LSIP: + case Xtensa::LSX: + case Xtensa::LSXP: { // Insert memory wait instruction "memw" before volatile load/store as it is // implemented in gcc. If memoperands is empty then assume that it aslo // maybe volatile load/store and insert "memw". diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index c7d4f41b1f08e..850ec6f3b023a 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -56,6 +56,21 @@ enum { SRCL, // Shift Right Combined SRCR, + + // Floating point unordered compare conditions + CMPUEQ, + CMPULE, + CMPULT, + CMPUO, + // Floating point compare conditions + CMPOEQ, + CMPOLE, + CMPOLT, + // FP multipy-add/sub + MADD, + MSUB, + // FP move + MOVS, }; } @@ -70,6 +85,9 @@ class XtensaTargetLowering : public TargetLowering { return LHSTy.getSizeInBits() <= 32 ? MVT::i32 : MVT::i64; } + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, + EVT VT) const override; + EVT getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const override { if (!VT.isVector()) @@ -81,6 +99,9 @@ class XtensaTargetLowering : public TargetLowering { const char *getTargetNodeName(unsigned Opcode) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; @@ -133,6 +154,8 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerImmediate(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerImmediateFP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index 005532b864c41..5b1bc73267157 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -151,11 +151,15 @@ void XtensaInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, unsigned &LoadOpcode, unsigned &StoreOpcode, int64_t offset) const { - assert((RC == &Xtensa::ARRegClass) && - "Unsupported regclass to load or store"); - - LoadOpcode = Xtensa::L32I; - StoreOpcode = Xtensa::S32I; + if (RC == &Xtensa::ARRegClass) { + LoadOpcode = Xtensa::L32I; + StoreOpcode = Xtensa::S32I; + } else if (RC == &Xtensa::FPRRegClass) { + LoadOpcode = Xtensa::LSI; + StoreOpcode = Xtensa::SSI; + } else { + llvm_unreachable("Unsupported regclass to load or store"); + } } void XtensaInstrInfo::loadImmediate(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index 0bd3ba81340ff..758277f7f601f 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -986,6 +986,297 @@ let Predicates = [HasDiv32] in { def REMU : ArithLogic_RRR<0x0E, 0x02, "remu", urem>; } +//===----------------------------------------------------------------------===// +// Floating-Point Instructions +//===----------------------------------------------------------------------===// + +class FPArith_RRR oper2, bits<4> oper1, string instrAsm, + SDPatternOperator opNode, bit isComm = 0> + : RRR_Inst<0x00, oper1, oper2, (outs FPR:$r), (ins FPR:$s, FPR:$t), + instrAsm#"\t$r, $s, $t", + [(set FPR:$r, (opNode FPR:$s, FPR:$t))]> { + let isCommutable = isComm; + let isReMaterializable = 0; + let Predicates = [HasSingleFloat]; +} + +def ADD_S : FPArith_RRR<0x00, 0x0A, "add.s", fadd, 1>; +def SUB_S : FPArith_RRR<0x01, 0x0A, "sub.s", fsub>; +def MUL_S : FPArith_RRR<0x02, 0x0A, "mul.s", fmul, 1>; + +// FP load instructions +let mayLoad = 1, usesCustomInserter = 1, Predicates = [HasSingleFloat] in { + def LSI : RRI8_Inst<0x03, (outs FPR:$t), (ins mem32:$addr), + "lsi\t$t, $addr", []> { + bits<12> addr; + + let r = 0x00; + let imm8{7-0} = addr{11-4}; + let s{3-0} = addr{3-0}; + } + + def LSIP : RRI8_Inst<0x03, (outs FPR:$t), (ins mem32:$addr), + "lsip\t$t, $addr", []> { + bits<12> addr; + + let r = 0x08; + let imm8{7-0} = addr{11-4}; + let s{3-0} = addr{3-0}; + } + + def LSX : RRR_Inst<0x00, 0x08, 0x00, (outs), (ins FPR:$r, AR:$s, AR:$t), + "lsx\t$r, $s, $t", []>; + + def LSXP : RRR_Inst<0x00, 0x08, 0x01, (outs), (ins FPR:$r, AR:$s, AR:$t), + "lsxp\t$r, $s, $t", []>; +} + +def : Pat<(f32 (load addr_ish4:$addr)), (f32 (LSI mem32:$addr))>; + +// FP store instructions +let mayStore = 1, usesCustomInserter = 1, Predicates = [HasSingleFloat] in { + def SSI : RRI8_Inst<0x03, (outs), (ins FPR:$t, mem32:$addr), + "ssi\t$t, $addr", []> { + bits<12> addr; + + let r = 0x04; + let imm8{7-0} = addr{11-4}; + let s{3-0} = addr{3-0}; + } + + def SSIP : RRI8_Inst<0x03, (outs), (ins FPR:$t, mem32:$addr), + "ssip\t$t, $addr", []> { + bits<12> addr; + + let r = 0x0C; + let imm8{7-0} = addr{11-4}; + let s{3-0} = addr{3-0}; + } + + def SSX: RRR_Inst<0x00, 0x08, 0x04, (outs), (ins FPR:$r, AR:$s, AR:$t), + "ssx\t$r, $s, $t", []>; + + def SSXP: RRR_Inst<0x00, 0x08, 0x05, (outs), (ins FPR:$r, AR:$s, AR:$t), + "ssxp\t$r, $s, $t", []>; +} + +def : Pat<(store FPR:$t, addr_ish4:$addr), (SSI FPR:$t, mem32:$addr)>; + +// FP compare instructions +let isCompare = 1, Predicates = [HasSingleFloat] in { + class FCompare oper2, bits<4> oper1, string instrAsm, + SDPatternOperator opNode, bit isComm = 0> + : RRR_Inst<0x00, oper1, oper2, (outs BR:$r), (ins FPR:$s, FPR:$t), + instrAsm#"\t$r, $s, $t", + [(set BR:$r, (opNode FPR:$s, FPR:$t))]> { + let isCommutable = isComm; + let isReMaterializable = 0; + let Predicates = [HasSingleFloat]; + } +} + +def OEQ_S : FCompare<0x02, 0x0b, "oeq.s", Xtensa_cmpoeq, 1>; +def OLT_S : FCompare<0x04, 0x0b, "olt.s", Xtensa_cmpolt, 0>; +def OLE_S : FCompare<0x06, 0x0b, "ole.s", Xtensa_cmpole, 0>; + +def UEQ_S : FCompare<0x03, 0x0b, "ueq.s", Xtensa_cmpueq, 1>; +def ULT_S : FCompare<0x05, 0x0b, "ult.s", Xtensa_cmpult, 0>; +def ULE_S : FCompare<0x07, 0x0b, "ule.s", Xtensa_cmpule, 0>; +def UN_S : FCompare<0x01, 0x0b, "un.s", Xtensa_cmpuo, 1>; + +def ABS_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "abs.s\t$r, $s", + [(set FPR:$r, (fabs FPR:$s))]>, Requires<[HasSingleFloat]> { + let t = 0x01; +} + +def : Pat<(fabs FPR:$s), (ABS_S $s)>; + +def ADDEXP_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "addexp.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x0E; +} + +def ADDEXPM_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "addexpm.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x0F; +} + +def CEIL_S : RRR_Inst<0x00, 0x0A, 0x0B, (outs AR:$r), (ins FPR:$s, uimm4:$imm), + "ceil.s\t$r, $s, $imm", []>, Requires<[HasSingleFloat]> { + bits<4> imm; + + let t = imm; +} + +def CONST_S : RRR_Inst<0x00, 0x0a, 0x0f, (outs FPR:$r), (ins uimm4:$imm), + "const.s\t$r, $imm", []>, Requires<[HasSingleFloat]> { + bits<4> imm; + + let t = 0x03; + let s = imm{3-0}; +} + +def DIV0_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "div0.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x7; +} + +def DIVN_S : RRR_Inst<0x00, 0x0A, 0x07, (outs FPR:$r), (ins FPR:$s, FPR:$t), + "divn.s\t$r, $s, $t", []>, Requires<[HasSingleFloat]>; + +def FLOAT_S : RRR_Inst<0x00, 0x0A, 0x0c, (outs FPR:$r), (ins AR:$s, uimm4:$imm), + "float.s\t$r, $s, $imm", []>, Requires<[HasSingleFloat]> { + bits<4> imm; + + let t = imm; +} + +def : Pat<(f32 (sint_to_fp AR:$s)), (FLOAT_S AR:$s, 0)>; + +def FLOOR_S : RRR_Inst<0x00, 0x0A, 0x0A, (outs AR:$r), (ins FPR:$s, uimm4:$imm), + "floor.s\t$r, $s, $imm", []>, Requires<[HasSingleFloat]> { + bits<4> imm; + + let t = imm; +} + +def MADDN_S : RRR_Inst<0x00, 0x0A, 0x06, (outs FPR:$r), (ins FPR:$s, FPR:$t), + "maddn.s\t$r, $s, $t", []>, Requires<[HasSingleFloat]> { + let isCommutable = 0; +} + +// FP multipy-add +def MADD_S : RRR_Inst<0x00, 0x0A, 0x04, (outs FPR:$r), (ins FPR:$a, FPR:$s, FPR:$t), + "madd.s\t$r, $s, $t", + [(set FPR:$r, (Xtensa_madd FPR:$a, FPR:$s, FPR:$t))]>, + Requires<[HasSingleFloat]> { + let isCommutable = 0; + let isReMaterializable = 0; + let Constraints = "$r = $a"; +} + +// fmadd: r1 * r2 + r3 +def : Pat<(fma FPR:$r1, FPR:$r2, FPR:$r3), + (MADD_S $r3, $r1, $r2)>; + + +def MKDADJ_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "mkdadj.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x0D; +} + +def MKSADJ_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "mksadj.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x0C; +} + +// FP move instructions +def MOV_S : RRR_Inst<0x00, 0x0A, 0x0f, (outs FPR:$r), (ins FPR:$s), + "mov.s\t$r, $s", + [(set FPR:$r, (Xtensa_movs FPR:$s))]>, Requires<[HasSingleFloat]> { + let t = 0x00; +} + +def MOVEQZ_S : RRR_Inst<0x00, 0x0B, 0x08, (outs FPR:$r), (ins FPR:$s, AR:$t), + "moveqz.s\t$r, $s, $t", []>, Requires<[HasSingleFloat]>; + +def MOVF_S : RRR_Inst<0x00, 0x0B, 0x0C, (outs FPR:$r), (ins FPR:$s, BR:$t), + "movf.s\t$r, $s, $t", []>, Requires<[HasBoolean, HasSingleFloat]>; + +def MOVGEZ_S : RRR_Inst<0x00, 0x0B, 0x0B, (outs FPR:$r), (ins FPR:$s, AR:$t), + "movgez.s\t$r, $s, $t", []>, Requires<[HasSingleFloat]>; + +def MOVLTZ_S : RRR_Inst<0x00, 0x0B, 0x0A, (outs FPR:$r), (ins FPR:$s, AR:$t), + "movltz.s\t$r, $s, $t", []>, Requires<[HasSingleFloat]>; + +def MOVNEZ_S : RRR_Inst<0x00, 0x0B, 0x09, (outs FPR:$r), (ins FPR:$s, AR:$t), + "movnez.s\t$r, $s, $t", []>, Requires<[HasSingleFloat]>; + +def MOVT_S : RRR_Inst<0x00, 0x0B, 0x0D, (outs FPR:$r), (ins FPR:$s, BR:$t), + "movt.s\t$r, $s, $t", []>, Requires<[HasBoolean, HasSingleFloat]>; + +// FP multipy-sub +def MSUB_S : RRR_Inst<0x00, 0x0A, 0x05, (outs FPR:$r), (ins FPR:$a, FPR:$s, FPR:$t), + "msub.s\t$r, $s, $t", + [(set FPR:$r, (Xtensa_msub FPR:$a, FPR:$s, FPR:$t))]>, Requires<[HasSingleFloat]> { + let isCommutable = 0; + let isReMaterializable = 0; + let Constraints = "$r = $a"; +} + +def NEXP01_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "nexp01.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x0B; +} + +def NEG_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "neg.s\t$r, $s", + [(set FPR:$r, (fneg FPR:$s))]> { + let t = 0x06; +} + +def RECIP0_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "recip0.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x08; +} + +def RFR : RRR_Inst<0x00, 0x0A, 0x0f, (outs AR:$r), (ins FPR:$s), + "rfr\t$r, $s", + [(set AR:$r, (bitconvert FPR:$s))]> { + let t = 0x04; +} + +def ROUND_S : RRR_Inst<0x00, 0x0A, 0x08, (outs AR:$r), (ins FPR:$s, uimm4:$imm), + "round.s\t$r, $s, $imm", []>, Requires<[HasSingleFloat]> { + bits<4> imm; + + let t = imm; +} + +def RSQRT0_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "rsqrt0.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x0A; +} + +def SQRT0_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), + "sqrt0.s\t$r, $s", []>, Requires<[HasSingleFloat]> { + let t = 0x09; +} + +def TRUNC_S : RRR_Inst<0x00, 0x0A, 0x09, (outs AR:$r), (ins FPR:$s, uimm4:$imm), + "trunc.s\t$r, $s, $imm", []> { + bits<4> imm; + + let t = imm; +} + +def : Pat<(i32 (fp_to_sint FPR:$s)), (TRUNC_S FPR:$s, 0)>; + +def UFLOAT_S : RRR_Inst<0x00, 0x0A, 0x0D, (outs FPR:$r), (ins AR:$s, uimm4:$imm), + "ufloat.s\t$r, $s, $imm", []> { + bits<4> imm; + + let t = imm; +} + +def : Pat<(f32 (uint_to_fp AR:$s)), (UFLOAT_S AR:$s, 0)>; + +def UTRUNC_S : RRR_Inst<0x00, 0x0A, 0x0e, (outs AR:$r), (ins FPR:$s, uimm4:$imm), + "utrunc.s\t$r, $s, $imm", []> { + bits<4> imm; + + let t = imm; +} + +def : Pat<(i32 (fp_to_uint FPR:$s)), (UTRUNC_S FPR:$s, 0)>; + +def WFR : RRR_Inst<0x00, 0x0A, 0x0f, (outs FPR:$r), (ins AR:$s), + "wfr\t$r, $s", + [(set FPR:$r, (bitconvert AR:$s))]> { + let t = 0x05; +} + //===----------------------------------------------------------------------===// // DSP Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaOperators.td b/llvm/lib/Target/Xtensa/XtensaOperators.td index 12b81fccec479..fea13c2298d97 100644 --- a/llvm/lib/Target/Xtensa/XtensaOperators.td +++ b/llvm/lib/Target/Xtensa/XtensaOperators.td @@ -25,6 +25,11 @@ def SDT_XtensaSelectCC : SDTypeProfile<1, 5, SDTCisSameAs<2, 3>, SDTCisVT<5, i32>]>; +def SDT_XtensaCmp : SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>, SDTCisVT<1, f32>, SDTCisVT<2, f32>]>; +def SDT_XtensaMADD : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, f32>]>; +def SDT_XtensaMOVS : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisVT<0, f32>]>; +def SDT_XtensaSelectCCFP : SDTypeProfile<1, 5, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisVT<5, i32>]>; + def SDT_XtensaSRC : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; @@ -70,3 +75,15 @@ def Xtensa_extui: SDNode<"XtensaISD::EXTUI", SDT_XtensaEXTUI>; def Xtensa_movsp: SDNode<"XtensaISD::MOVSP", SDT_XtensaMOVSP, [SDNPHasChain, SDNPSideEffect, SDNPInGlue]>; + +def Xtensa_cmpoeq : SDNode<"XtensaISD::CMPOEQ", SDT_XtensaCmp, [SDNPOutGlue]>; +def Xtensa_cmpolt : SDNode<"XtensaISD::CMPOLT", SDT_XtensaCmp, [SDNPOutGlue]>; +def Xtensa_cmpole : SDNode<"XtensaISD::CMPOLE", SDT_XtensaCmp, [SDNPOutGlue]>; +def Xtensa_cmpueq : SDNode<"XtensaISD::CMPUEQ", SDT_XtensaCmp, [SDNPOutGlue]>; +def Xtensa_cmpult : SDNode<"XtensaISD::CMPULT", SDT_XtensaCmp, [SDNPOutGlue]>; +def Xtensa_cmpule : SDNode<"XtensaISD::CMPULE", SDT_XtensaCmp, [SDNPOutGlue]>; +def Xtensa_cmpuo : SDNode<"XtensaISD::CMPUO", SDT_XtensaCmp, [SDNPOutGlue]>; + +def Xtensa_madd: SDNode<"XtensaISD::MADD", SDT_XtensaMADD, [SDNPInGlue]>; +def Xtensa_msub: SDNode<"XtensaISD::MSUB", SDT_XtensaMADD, [SDNPInGlue]>; +def Xtensa_movs: SDNode<"XtensaISD::MOVS", SDT_XtensaMOVS, [SDNPInGlue]>; diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td index 2a40431adc7f0..36b2b50ccbe9d 100644 --- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td @@ -103,6 +103,50 @@ def MR : RegisterClass<"Xtensa", [i32], 32, (add MR01, MR23)>; def SR : RegisterClass<"Xtensa", [i32], 32, (add LBEG, LEND, LCOUNT, SAR, BREG, MR, WINDOWBASE, WINDOWSTART)>; +//===----------------------------------------------------------------------===// +// USER registers +//===----------------------------------------------------------------------===// +class URReg num, string n, list alt = []> : XtensaReg { + let HWEncoding{7-0} = num; + let AltNames = alt; +} + +def FCR : URReg<232, "fcr", ["FCR"]>; +def FSR : URReg<233, "fsr", ["FSR"]>; + +def UR : RegisterClass<"Xtensa", [i32], 32, (add FCR, FSR)>; + +//===----------------------------------------------------------------------===// +// Floating-Point registers +//===----------------------------------------------------------------------===// + +// Xtensa Floating-Point regs +class FPReg num, string n> : XtensaReg { + let HWEncoding{3-0} = num; +} + +def F0 : FPReg<0, "f0">, DwarfRegNum<[19]>; +def F1 : FPReg<1, "f1">, DwarfRegNum<[20]>; +def F2 : FPReg<2, "f2">, DwarfRegNum<[21]>; +def F3 : FPReg<3, "f3">, DwarfRegNum<[22]>; +def F4 : FPReg<4, "f4">, DwarfRegNum<[23]>; +def F5 : FPReg<5, "f5">, DwarfRegNum<[24]>; +def F6 : FPReg<6, "f6">, DwarfRegNum<[25]>; +def F7 : FPReg<7, "f7">, DwarfRegNum<[26]>; +def F8 : FPReg<8, "f8">, DwarfRegNum<[27]>; +def F9 : FPReg<9, "f9">, DwarfRegNum<[28]>; +def F10 : FPReg<10, "f10">, DwarfRegNum<[29]>; +def F11 : FPReg<11, "f11">, DwarfRegNum<[30]>; +def F12 : FPReg<12, "f12">, DwarfRegNum<[31]>; +def F13 : FPReg<13, "f13">, DwarfRegNum<[32]>; +def F14 : FPReg<14, "f14">, DwarfRegNum<[33]>; +def F15 : FPReg<15, "f15">, DwarfRegNum<[34]>; + +// Floating-Point register class with allocation order +def FPR : RegisterClass<"Xtensa", [f32], 32, (add + F8, F9, F10, F11, F12, F13, F14, F15, + F7, F6, F5, F4, F3, F2, F1, F0)>; + //===----------------------------------------------------------------------===// // Boolean registers //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h index 227ce2134b33b..96ca62c2c33ee 100644 --- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h +++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h @@ -77,6 +77,7 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { bool hasMul32() const { return HasMul32; } bool hasMul32High() const { return HasMul32High; } bool hasDiv32() const { return HasDiv32; } + bool hasSingleFloat() const { return HasSingleFloat; } bool isWindowedABI() const { return hasWindowed(); } // Automatically generated by tblgen. diff --git a/llvm/test/CodeGen/Xtensa/float-arith.ll b/llvm/test/CodeGen/Xtensa/float-arith.ll new file mode 100644 index 0000000000000..be12a1fb70d22 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/float-arith.ll @@ -0,0 +1,569 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=xtensa -mattr=+fp -verify-machineinstrs < %s | FileCheck -check-prefix=XTENSA %s + +define float @fadd_s(float %a, float %b) nounwind { +; XTENSA-LABEL: fadd_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: add.s f8, f9, f8 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %1 = fadd float %a, %b + ret float %1 +} + +define float @fsub_s(float %a, float %b) nounwind { +; XTENSA-LABEL: fsub_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: sub.s f8, f9, f8 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %1 = fsub float %a, %b + ret float %1 +} + +define float @fmul_s(float %a, float %b) nounwind { +; XTENSA-LABEL: fmul_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: mul.s f8, f9, f8 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %1 = fmul float %a, %b + ret float %1 +} + +define float @fdiv_s(float %a, float %b) nounwind { +; XTENSA-LABEL: fdiv_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI3_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = fdiv float %a, %b + ret float %1 +} + +declare float @llvm.sqrt.f32(float) + +define float @fsqrt_s(float %a) nounwind { +; XTENSA-LABEL: fsqrt_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI4_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +declare float @llvm.fabs.f32(float) + +define float @fabs_s(float %a, float %b) nounwind { +; XTENSA-LABEL: fabs_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: add.s f8, f9, f8 +; XTENSA-NEXT: abs.s f9, f8 +; XTENSA-NEXT: add.s f8, f9, f8 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %1 = fadd float %a, %b + %2 = call float @llvm.fabs.f32(float %1) + %3 = fadd float %2, %1 + ret float %3 +} + +declare float @llvm.minnum.f32(float, float) + +define float @fmin_s(float %a, float %b) nounwind { +; XTENSA-LABEL: fmin_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI6_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.minnum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.maxnum.f32(float, float) + +define float @fmax_s(float %a, float %b) nounwind { +; XTENSA-LABEL: fmax_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI7_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.maxnum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fmadd_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmadd_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +define float @fmsub_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmsub_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI9_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a4 +; XTENSA-NEXT: add.s f8, f9, f8 +; XTENSA-NEXT: neg.s f8, f8 +; XTENSA-NEXT: wfr f9, a3 +; XTENSA-NEXT: wfr f10, a2 +; XTENSA-NEXT: madd.s f8, f10, f9 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %negc = fsub float -0.0, %c_ + %1 = call float @llvm.fma.f32(float %a, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmadd_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI10_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: add.s f9, f9, f8 +; XTENSA-NEXT: neg.s f9, f9 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: add.s f8, f10, f8 +; XTENSA-NEXT: neg.s f8, f8 +; XTENSA-NEXT: wfr f10, a3 +; XTENSA-NEXT: madd.s f8, f9, f10 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %a_ = fadd float 0.0, %a + %c_ = fadd float 0.0, %c + %nega = fsub float -0.0, %a_ + %negc = fsub float -0.0, %c_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmadd_s_2: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI11_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a3 +; XTENSA-NEXT: add.s f9, f9, f8 +; XTENSA-NEXT: neg.s f9, f9 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: add.s f8, f10, f8 +; XTENSA-NEXT: neg.s f8, f8 +; XTENSA-NEXT: wfr f10, a2 +; XTENSA-NEXT: madd.s f8, f10, f9 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %b_ = fadd float 0.0, %b + %c_ = fadd float 0.0, %c + %negb = fsub float -0.0, %b_ + %negc = fsub float -0.0, %c_ + %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc) + ret float %1 +} + +define float @fnmadd_s_3(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmadd_s_3: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a8, f10 +; XTENSA-NEXT: l32r a9, .LCPI12_0 +; XTENSA-NEXT: xor a2, a8, a9 +; XTENSA-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg float %1 + ret float %neg +} + +define float @fnmadd_nsz(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmadd_nsz: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a8, f10 +; XTENSA-NEXT: l32r a9, .LCPI13_0 +; XTENSA-NEXT: xor a2, a8, a9 +; XTENSA-NEXT: ret + %1 = call nsz float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg nsz float %1 + ret float %neg +} + +define float @fnmsub_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmsub_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI14_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: add.s f8, f9, f8 +; XTENSA-NEXT: neg.s f8, f8 +; XTENSA-NEXT: wfr f9, a3 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f8, f9 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: ret + %a_ = fadd float 0.0, %a + %nega = fsub float -0.0, %a_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %c) + ret float %1 +} + +define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmsub_s_2: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI15_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a3 +; XTENSA-NEXT: add.s f8, f9, f8 +; XTENSA-NEXT: neg.s f8, f8 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: ret + %b_ = fadd float 0.0, %b + %negb = fsub float -0.0, %b_ + %1 = call float @llvm.fma.f32(float %a, float %negb, float %c) + ret float %1 +} + +define float @fmadd_s_contract(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmadd_s_contract: +; XTENSA: # %bb.0: +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: mul.s f8, f9, f8 +; XTENSA-NEXT: wfr f9, a4 +; XTENSA-NEXT: add.s f8, f8, f9 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %1 = fmul contract float %a, %b + %2 = fadd contract float %1, %c + ret float %2 +} + +define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmsub_s_contract: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI17_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a4 +; XTENSA-NEXT: add.s f8, f9, f8 +; XTENSA-NEXT: wfr f9, a3 +; XTENSA-NEXT: wfr f10, a2 +; XTENSA-NEXT: mul.s f9, f10, f9 +; XTENSA-NEXT: sub.s f8, f9, f8 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %1 = fmul contract float %a, %b + %2 = fsub contract float %1, %c_ + ret float %2 +} + +define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmadd_s_contract: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI18_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a3 +; XTENSA-NEXT: add.s f9, f9, f8 +; XTENSA-NEXT: wfr f10, a2 +; XTENSA-NEXT: add.s f10, f10, f8 +; XTENSA-NEXT: mul.s f9, f10, f9 +; XTENSA-NEXT: neg.s f9, f9 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: add.s f8, f10, f8 +; XTENSA-NEXT: sub.s f8, f9, f8 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %a_ = fadd float 0.0, %a ; avoid negation using xor + %b_ = fadd float 0.0, %b ; avoid negation using xor + %c_ = fadd float 0.0, %c ; avoid negation using xor + %1 = fmul contract float %a_, %b_ + %2 = fneg float %1 + %3 = fsub contract float %2, %c_ + ret float %3 +} + +define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmsub_s_contract: +; XTENSA: # %bb.0: +; XTENSA-NEXT: l32r a8, .LCPI19_0 +; XTENSA-NEXT: wfr f8, a8 +; XTENSA-NEXT: wfr f9, a3 +; XTENSA-NEXT: add.s f9, f9, f8 +; XTENSA-NEXT: wfr f10, a2 +; XTENSA-NEXT: add.s f8, f10, f8 +; XTENSA-NEXT: mul.s f8, f8, f9 +; XTENSA-NEXT: wfr f9, a4 +; XTENSA-NEXT: sub.s f8, f9, f8 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: ret + %a_ = fadd float 0.0, %a ; avoid negation using xor + %b_ = fadd float 0.0, %b ; avoid negation using xor + %1 = fmul contract float %a_, %b_ + %2 = fsub contract float %c, %1 + ret float %2 +} + +declare float @llvm.powi.f32(float, i32) + +define float @powi_f32(float %a, i32 %b) nounwind { +; XTENSA-LABEL: powi_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI20_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + + %1 = call float @llvm.powi.f32(float %a, i32 %b) + ret float %1 +} + +declare float @llvm.sin.f32(float) + +define float @sin_f32(float %a) nounwind { +; XTENSA-LABEL: sin_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI21_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.sin.f32(float %a) + ret float %1 +} + +declare float @llvm.cos.f32(float) + +define float @cos_f32(float %a) nounwind { +; XTENSA-LABEL: cos_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI22_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.cos.f32(float %a) + ret float %1 +} +declare float @llvm.exp.f32(float) + +define float @exp_f32(float %a) nounwind { +; XTENSA-LABEL: exp_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI23_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.exp.f32(float %a) + ret float %1 +} + +define float @log_f32(float %a) nounwind { +; XTENSA-LABEL: log_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI24_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.log.f32(float %a) + ret float %1 +} + +declare float @llvm.log10.f32(float) + +define float @log10_f32(float %a) nounwind { +; XTENSA-LABEL: log10_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI25_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.log10.f32(float %a) + ret float %1 +} + +declare float @llvm.log2.f32(float) + +define float @log2_f32(float %a) nounwind { +; XTENSA-LABEL: log2_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI26_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.log2.f32(float %a) + ret float %1 +} + +declare float @llvm.floor.f32(float) + +define float @floor_f32(float %a) nounwind { +; XTENSA-LABEL: floor_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI27_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.floor.f32(float %a) + ret float %1 +} + +declare float @llvm.ceil.f32(float) + +define float @ceil_f32(float %a) nounwind { +; XTENSA-LABEL: ceil_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI28_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.ceil.f32(float %a) + ret float %1 +} +declare float @llvm.rint.f32(float) + +define float @rint_f32(float %a) nounwind { +; XTENSA-LABEL: rint_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI29_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.rint.f32(float %a) + ret float %1 +} + +declare float @llvm.nearbyint.f32(float) + +define float @nearbyint_f32(float %a) nounwind { +; XTENSA-LABEL: nearbyint_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI30_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.nearbyint.f32(float %a) + ret float %1 +} + +declare float @llvm.round.f32(float) + +define float @round_f32(float %a) nounwind { +; XTENSA-LABEL: round_f32: +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI31_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call float @llvm.round.f32(float %a) + ret float %1 +} diff --git a/llvm/test/MC/Xtensa/float-err.s b/llvm/test/MC/Xtensa/float-err.s new file mode 100644 index 0000000000000..a7145f0673395 --- /dev/null +++ b/llvm/test/MC/Xtensa/float-err.s @@ -0,0 +1,37 @@ +# RUN: not llvm-mc %s -triple=xtensa -filetype=asm 2>&1 | FileCheck --implicit-check-not=error: %s + +ceil.s a2, f3, 17 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +const.s f3, 18 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +float.s f2, a3, 16 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +ufloat.s f2, a3, 25 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: expected immediate in range [0, 15] + +floor.s a2, f3, 17 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +lsi f2, a3, 4099 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +lsip f2, a3, 4099 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +round.s a2, f3, 20 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +ssi f2, a3, 5000 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +ssip f2, a3, 5001 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +trunc.s a2, f3, 21 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: expected immediate in range [0, 15] + +utrunc.s a2, f3, 19 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: expected immediate in range [0, 15] diff --git a/llvm/test/MC/Xtensa/float.s b/llvm/test/MC/Xtensa/float.s new file mode 100644 index 0000000000000..0c51addeb3774 --- /dev/null +++ b/llvm/test/MC/Xtensa/float.s @@ -0,0 +1,177 @@ +# RUN: llvm-mc %s -triple=xtensa -mattr=+fp -mattr=+bool -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s + +.align 4 +LBL0: + +# CHECK-INST: abs.s f2, f3 +# CHECK: encoding: [0x10,0x23,0xfa] + abs.s f2, f3 +# CHECK-INST: add.s f2, f3, f4 +# CHECK: encoding: [0x40,0x23,0x0a] + add.s f2, f3, f4 +# CHECK-INST: addexp.s f2, f3 +# CHECK: encoding: [0xe0,0x23,0xfa] + addexp.s f2, f3 +# CHECK-INST: addexpm.s f2, f3 +# CHECK: encoding: [0xf0,0x23,0xfa] + addexpm.s f2, f3 + +# CHECK-INST: ceil.s a2, f3, 5 +# CHECK: encoding: [0x50,0x23,0xba] + ceil.s a2, f3, 5 +# CHECK-INST: const.s f3, 5 +# CHECK: encoding: [0x30,0x35,0xfa] + const.s f3, 5 + +# CHECK-INST: div0.s f2, f3 +# CHECK: encoding: [0x70,0x23,0xfa] + div0.s f2, f3 +# CHECK-INST: divn.s f2, f3, f4 +# CHECK: encoding: [0x40,0x23,0x7a] + divn.s f2, f3, f4 + +# CHECK-INST: float.s f2, a3, 5 +# CHECK: encoding: [0x50,0x23,0xca] + float.s f2, a3, 5 +# CHECK-INST: floor.s a2, f3, 5 +# CHECK: encoding: [0x50,0x23,0xaa] + floor.s a2, f3, 5 + +# CHECK-INST: lsi f2, a3, 8 +# CHECK: encoding: [0x23,0x03,0x02] + lsi f2, a3, 8 +# CHECK-INST: lsip f2, a3, 8 +# CHECK: encoding: [0x23,0x83,0x02] + lsip f2, a3, 8 +# CHECK-INST: lsx f2, a3, a4 +# CHECK: encoding: [0x40,0x23,0x08] + lsx f2, a3, a4 +# CHECK-INST: lsxp f2, a3, a4 +# CHECK: encoding: [0x40,0x23,0x18] + lsxp f2, a3, a4 + +# CHECK-INST: madd.s f2, f3, f4 +# CHECK: encoding: [0x40,0x23,0x4a] + madd.s f2, f3, f4 +# CHECK-INST: maddn.s f2, f3, f4 +# CHECK: encoding: [0x40,0x23,0x6a] + maddn.s f2, f3, f4 +# CHECK-INST: mkdadj.s f2, f3 +# CHECK: encoding: [0xd0,0x23,0xfa] + mkdadj.s f2, f3 +# CHECK-INST: mksadj.s f2, f3 +# CHECK: encoding: [0xc0,0x23,0xfa] + mksadj.s f2, f3 + +# CHECK-INST: mov.s f2, f3 +# CHECK: encoding: [0x00,0x23,0xfa] + mov.s f2, f3 + +# CHECK-INST: moveqz.s f2, f3, a4 +# CHECK: encoding: [0x40,0x23,0x8b] + moveqz.s f2, f3, a4 +# CHECK-INST: movf.s f2, f3, b0 +# CHECK: encoding: [0x00,0x23,0xcb] + movf.s f2, f3, b0 +# CHECK-INST: movgez.s f2, f3, a4 +# CHECK: encoding: [0x40,0x23,0xbb] + movgez.s f2, f3, a4 +# CHECK-INST: movltz.s f2, f3, a4 +# CHECK: encoding: [0x40,0x23,0xab] + movltz.s f2, f3, a4 +# CHECK-INST: movnez.s f2, f3, a4 +# CHECK: encoding: [0x40,0x23,0x9b] + movnez.s f2, f3, a4 +# CHECK-INST: movt.s f2, f3, b0 +# CHECK: encoding: [0x00,0x23,0xdb] + movt.s f2, f3, b0 + +# CHECK-INST: msub.s f2, f3, f4 +# CHECK: encoding: [0x40,0x23,0x5a] + msub.s f2, f3, f4 +# CHECK-INST: mul.s f2, f3, f4 +# CHECK: encoding: [0x40,0x23,0x2a] + mul.s f2, f3, f4 +# CHECK-INST: neg.s f2, f3 +# CHECK: encoding: [0x60,0x23,0xfa] + neg.s f2, f3 + +# CHECK-INST: nexp01.s f2, f3 +# CHECK: encoding: [0xb0,0x23,0xfa] + nexp01.s f2, f3 + +# CHECK-INST: oeq.s b0, f2, f3 +# CHECK: encoding: [0x30,0x02,0x2b] + oeq.s b0, f2, f3 +# CHECK-INST: ole.s b0, f2, f3 +# CHECK: encoding: [0x30,0x02,0x6b] + ole.s b0, f2, f3 +# CHECK-INST: olt.s b0, f2, f3 +# CHECK: encoding: [0x30,0x02,0x4b] + olt.s b0, f2, f3 + +# CHECK-INST: recip0.s f2, f3 +# CHECK: encoding: [0x80,0x23,0xfa] + recip0.s f2, f3 + +# CHECK-INST: rfr a2, f3 +# CHECK: encoding: [0x40,0x23,0xfa] + rfr a2, f3 + +# CHECK-INST: round.s a2, f3, 5 +# CHECK: encoding: [0x50,0x23,0x8a] + round.s a2, f3, 5 +# CHECK-INST: rsqrt0.s f2, f3 +# CHECK: encoding: [0xa0,0x23,0xfa] + rsqrt0.s f2, f3 +# CHECK-INST: sqrt0.s f2, f3 +# CHECK: encoding: [0x90,0x23,0xfa] + sqrt0.s f2, f3 + +# CHECK-INST: ssi f2, a3, 8 +# CHECK: encoding: [0x23,0x43,0x02] + ssi f2, a3, 8 +# CHECK-INST: ssip f2, a3, 8 +# CHECK: encoding: [0x23,0xc3,0x02] + ssip f2, a3, 8 +# CHECK-INST: ssx f2, a3, a4 +# CHECK: encoding: [0x40,0x23,0x48] + ssx f2, a3, a4 +# CHECK-INST: ssxp f2, a3, a4 +# CHECK: encoding: [0x40,0x23,0x58] + ssxp f2, a3, a4 + +# CHECK-INST: sub.s f2, f3, f4 +# CHECK: encoding: [0x40,0x23,0x1a] + sub.s f2, f3, f4 + +# CHECK-INST: trunc.s a2, f3, 5 +# CHECK: encoding: [0x50,0x23,0x9a] + trunc.s a2, f3, 5 + +# CHECK-INST: ueq.s b0, f2, f3 +# CHECK: encoding: [0x30,0x02,0x3b] + ueq.s b0, f2, f3 + +# CHECK-INST: ufloat.s f2, a3, 5 +# CHECK: encoding: [0x50,0x23,0xda] + ufloat.s f2, a3, 5 + +# CHECK-INST: ule.s b0, f2, f3 +# CHECK: encoding: [0x30,0x02,0x7b] + ule.s b0, f2, f3 +# CHECK-INST: ult.s b0, f2, f3 +# CHECK: encoding: [0x30,0x02,0x5b] + ult.s b0, f2, f3 +# CHECK-INST: un.s b0, f2, f3 +# CHECK: encoding: [0x30,0x02,0x1b] + un.s b0, f2, f3 + +# CHECK-INST: utrunc.s a2, f3, 5 +# CHECK: encoding: [0x50,0x23,0xea] + utrunc.s a2, f3, 5 + +# CHECK-INST: wfr f2, a3 +# CHECK: encoding: [0x50,0x23,0xfa] + wfr f2, a3